# Makefile for HabanaLabs AI accelerators driver
#
-obj-m := habanalabs.o
+obj-$(CONFIG_HABANA_AI) := habanalabs.o
-habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
- command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
- command_submission.o mmu.o firmware_if.o pci.o
-
-habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
+include $(src)/common/Makefile
+habanalabs-y += $(HL_COMMON_FILES)
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
include $(src)/gaudi/Makefile
habanalabs-y += $(HL_GAUDI_FILES)
+
+habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-int hl_asid_init(struct hl_device *hdev)
-{
- hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid),
- sizeof(*hdev->asid_bitmap), GFP_KERNEL);
- if (!hdev->asid_bitmap)
- return -ENOMEM;
-
- mutex_init(&hdev->asid_mutex);
-
- /* ASID 0 is reserved for the kernel driver and device CPU */
- set_bit(0, hdev->asid_bitmap);
-
- return 0;
-}
-
-void hl_asid_fini(struct hl_device *hdev)
-{
- mutex_destroy(&hdev->asid_mutex);
- kfree(hdev->asid_bitmap);
-}
-
-unsigned long hl_asid_alloc(struct hl_device *hdev)
-{
- unsigned long found;
-
- mutex_lock(&hdev->asid_mutex);
-
- found = find_first_zero_bit(hdev->asid_bitmap,
- hdev->asic_prop.max_asid);
- if (found == hdev->asic_prop.max_asid)
- found = 0;
- else
- set_bit(found, hdev->asid_bitmap);
-
- mutex_unlock(&hdev->asid_mutex);
-
- return found;
-}
-
-void hl_asid_free(struct hl_device *hdev, unsigned long asid)
-{
- if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
- "Invalid ASID %lu", asid))
- return;
- clear_bit(asid, hdev->asid_bitmap);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-
-static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
-{
- hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
- (void *) (uintptr_t) cb->kernel_address,
- cb->bus_address);
- kfree(cb);
-}
-
-static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
-{
- if (cb->is_pool) {
- spin_lock(&hdev->cb_pool_lock);
- list_add(&cb->pool_list, &hdev->cb_pool);
- spin_unlock(&hdev->cb_pool_lock);
- } else {
- cb_fini(hdev, cb);
- }
-}
-
-static void cb_release(struct kref *ref)
-{
- struct hl_device *hdev;
- struct hl_cb *cb;
-
- cb = container_of(ref, struct hl_cb, refcount);
- hdev = cb->hdev;
-
- hl_debugfs_remove_cb(cb);
-
- cb_do_release(hdev, cb);
-}
-
-static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
- int ctx_id)
-{
- struct hl_cb *cb;
- void *p;
-
- /*
- * We use of GFP_ATOMIC here because this function can be called from
- * the latency-sensitive code path for command submission. Due to H/W
- * limitations in some of the ASICs, the kernel must copy the user CB
- * that is designated for an external queue and actually enqueue
- * the kernel's copy. Hence, we must never sleep in this code section
- * and must use GFP_ATOMIC for all memory allocations.
- */
- if (ctx_id == HL_KERNEL_ASID_ID)
- cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
- else
- cb = kzalloc(sizeof(*cb), GFP_KERNEL);
-
- if (!cb)
- return NULL;
-
- if (ctx_id == HL_KERNEL_ASID_ID)
- p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
- &cb->bus_address, GFP_ATOMIC);
- else
- p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
- &cb->bus_address,
- GFP_USER | __GFP_ZERO);
- if (!p) {
- dev_err(hdev->dev,
- "failed to allocate %d of dma memory for CB\n",
- cb_size);
- kfree(cb);
- return NULL;
- }
-
- cb->kernel_address = (u64) (uintptr_t) p;
- cb->size = cb_size;
-
- return cb;
-}
-
-int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u32 cb_size, u64 *handle, int ctx_id)
-{
- struct hl_cb *cb;
- bool alloc_new_cb = true;
- int rc;
-
- /*
- * Can't use generic function to check this because of special case
- * where we create a CB as part of the reset process
- */
- if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) &&
- (ctx_id != HL_KERNEL_ASID_ID))) {
- dev_warn_ratelimited(hdev->dev,
- "Device is disabled or in reset. Can't create new CBs\n");
- rc = -EBUSY;
- goto out_err;
- }
-
- if (cb_size > SZ_2M) {
- dev_err(hdev->dev, "CB size %d must be less than %d\n",
- cb_size, SZ_2M);
- rc = -EINVAL;
- goto out_err;
- }
-
- /* Minimum allocation must be PAGE SIZE */
- if (cb_size < PAGE_SIZE)
- cb_size = PAGE_SIZE;
-
- if (ctx_id == HL_KERNEL_ASID_ID &&
- cb_size <= hdev->asic_prop.cb_pool_cb_size) {
-
- spin_lock(&hdev->cb_pool_lock);
- if (!list_empty(&hdev->cb_pool)) {
- cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
- pool_list);
- list_del(&cb->pool_list);
- spin_unlock(&hdev->cb_pool_lock);
- alloc_new_cb = false;
- } else {
- spin_unlock(&hdev->cb_pool_lock);
- dev_dbg(hdev->dev, "CB pool is empty\n");
- }
- }
-
- if (alloc_new_cb) {
- cb = hl_cb_alloc(hdev, cb_size, ctx_id);
- if (!cb) {
- rc = -ENOMEM;
- goto out_err;
- }
- }
-
- cb->hdev = hdev;
- cb->ctx_id = ctx_id;
-
- spin_lock(&mgr->cb_lock);
- rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
- spin_unlock(&mgr->cb_lock);
-
- if (rc < 0) {
- dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
- goto release_cb;
- }
-
- cb->id = rc;
-
- kref_init(&cb->refcount);
- spin_lock_init(&cb->lock);
-
- /*
- * idr is 32-bit so we can safely OR it with a mask that is above
- * 32 bit
- */
- *handle = cb->id | HL_MMAP_CB_MASK;
- *handle <<= PAGE_SHIFT;
-
- hl_debugfs_add_cb(cb);
-
- return 0;
-
-release_cb:
- cb_do_release(hdev, cb);
-out_err:
- *handle = 0;
-
- return rc;
-}
-
-int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
-{
- struct hl_cb *cb;
- u32 handle;
- int rc = 0;
-
- /*
- * handle was given to user to do mmap, I need to shift it back to
- * how the idr module gave it to me
- */
- cb_handle >>= PAGE_SHIFT;
- handle = (u32) cb_handle;
-
- spin_lock(&mgr->cb_lock);
-
- cb = idr_find(&mgr->cb_handles, handle);
- if (cb) {
- idr_remove(&mgr->cb_handles, handle);
- spin_unlock(&mgr->cb_lock);
- kref_put(&cb->refcount, cb_release);
- } else {
- spin_unlock(&mgr->cb_lock);
- dev_err(hdev->dev,
- "CB destroy failed, no match to handle 0x%x\n", handle);
- rc = -EINVAL;
- }
-
- return rc;
-}
-
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
-{
- union hl_cb_args *args = data;
- struct hl_device *hdev = hpriv->hdev;
- u64 handle = 0;
- int rc;
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_warn_ratelimited(hdev->dev,
- "Device is %s. Can't execute CB IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
- return -EBUSY;
- }
-
- switch (args->in.op) {
- case HL_CB_OP_CREATE:
- if (args->in.cb_size > HL_MAX_CB_SIZE) {
- dev_err(hdev->dev,
- "User requested CB size %d must be less than %d\n",
- args->in.cb_size, HL_MAX_CB_SIZE);
- rc = -EINVAL;
- } else {
- rc = hl_cb_create(hdev, &hpriv->cb_mgr,
- args->in.cb_size, &handle,
- hpriv->ctx->asid);
- }
-
- memset(args, 0, sizeof(*args));
- args->out.cb_handle = handle;
- break;
-
- case HL_CB_OP_DESTROY:
- rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
- args->in.cb_handle);
- break;
-
- default:
- rc = -ENOTTY;
- break;
- }
-
- return rc;
-}
-
-static void cb_vm_close(struct vm_area_struct *vma)
-{
- struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data;
- long new_mmap_size;
-
- new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start);
-
- if (new_mmap_size > 0) {
- cb->mmap_size = new_mmap_size;
- return;
- }
-
- spin_lock(&cb->lock);
- cb->mmap = false;
- spin_unlock(&cb->lock);
-
- hl_cb_put(cb);
- vma->vm_private_data = NULL;
-}
-
-static const struct vm_operations_struct cb_vm_ops = {
- .close = cb_vm_close
-};
-
-int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_cb *cb;
- phys_addr_t address;
- u32 handle;
- int rc;
-
- handle = vma->vm_pgoff;
-
- /* reference was taken here */
- cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
- if (!cb) {
- dev_err(hdev->dev,
- "CB mmap failed, no match to handle 0x%x\n", handle);
- return -EINVAL;
- }
-
- /* Validation check */
- if ((vma->vm_end - vma->vm_start) != ALIGN(cb->size, PAGE_SIZE)) {
- dev_err(hdev->dev,
- "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n",
- vma->vm_end - vma->vm_start, cb->size);
- rc = -EINVAL;
- goto put_cb;
- }
-
- spin_lock(&cb->lock);
-
- if (cb->mmap) {
- dev_err(hdev->dev,
- "CB mmap failed, CB already mmaped to user\n");
- rc = -EINVAL;
- goto release_lock;
- }
-
- cb->mmap = true;
-
- spin_unlock(&cb->lock);
-
- vma->vm_ops = &cb_vm_ops;
-
- /*
- * Note: We're transferring the cb reference to
- * vma->vm_private_data here.
- */
-
- vma->vm_private_data = cb;
-
- /* Calculate address for CB */
- address = virt_to_phys((void *) (uintptr_t) cb->kernel_address);
-
- rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
- address, cb->size);
-
- if (rc) {
- spin_lock(&cb->lock);
- cb->mmap = false;
- goto release_lock;
- }
-
- cb->mmap_size = cb->size;
-
- return 0;
-
-release_lock:
- spin_unlock(&cb->lock);
-put_cb:
- hl_cb_put(cb);
- return rc;
-}
-
-struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u32 handle)
-{
- struct hl_cb *cb;
-
- spin_lock(&mgr->cb_lock);
- cb = idr_find(&mgr->cb_handles, handle);
-
- if (!cb) {
- spin_unlock(&mgr->cb_lock);
- dev_warn(hdev->dev,
- "CB get failed, no match to handle 0x%x\n", handle);
- return NULL;
- }
-
- kref_get(&cb->refcount);
-
- spin_unlock(&mgr->cb_lock);
-
- return cb;
-
-}
-
-void hl_cb_put(struct hl_cb *cb)
-{
- kref_put(&cb->refcount, cb_release);
-}
-
-void hl_cb_mgr_init(struct hl_cb_mgr *mgr)
-{
- spin_lock_init(&mgr->cb_lock);
- idr_init(&mgr->cb_handles);
-}
-
-void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
-{
- struct hl_cb *cb;
- struct idr *idp;
- u32 id;
-
- idp = &mgr->cb_handles;
-
- idr_for_each_entry(idp, cb, id) {
- if (kref_put(&cb->refcount, cb_release) != 1)
- dev_err(hdev->dev,
- "CB %d for CTX ID %d is still alive\n",
- id, cb->ctx_id);
- }
-
- idr_destroy(&mgr->cb_handles);
-}
-
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
-{
- u64 cb_handle;
- struct hl_cb *cb;
- int rc;
-
- rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
- HL_KERNEL_ASID_ID);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to allocate CB for the kernel driver %d\n", rc);
- return NULL;
- }
-
- cb_handle >>= PAGE_SHIFT;
- cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
- /* hl_cb_get should never fail here so use kernel WARN */
- WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
- if (!cb)
- goto destroy_cb;
-
- return cb;
-
-destroy_cb:
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT);
-
- return NULL;
-}
-
-int hl_cb_pool_init(struct hl_device *hdev)
-{
- struct hl_cb *cb;
- int i;
-
- INIT_LIST_HEAD(&hdev->cb_pool);
- spin_lock_init(&hdev->cb_pool_lock);
-
- for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
- cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
- HL_KERNEL_ASID_ID);
- if (cb) {
- cb->is_pool = true;
- list_add(&cb->pool_list, &hdev->cb_pool);
- } else {
- hl_cb_pool_fini(hdev);
- return -ENOMEM;
- }
- }
-
- return 0;
-}
-
-int hl_cb_pool_fini(struct hl_device *hdev)
-{
- struct hl_cb *cb, *tmp;
-
- list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
- list_del(&cb->pool_list);
- cb_fini(hdev, cb);
- }
-
- return 0;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-
-#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
-
-static void job_wq_completion(struct work_struct *work);
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
- struct hl_ctx *ctx, u64 timeout_us, u64 seq);
-static void cs_do_release(struct kref *ref);
-
-static void hl_sob_reset(struct kref *ref)
-{
- struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
- kref);
- struct hl_device *hdev = hw_sob->hdev;
-
- hdev->asic_funcs->reset_sob(hdev, hw_sob);
-}
-
-void hl_sob_reset_error(struct kref *ref)
-{
- struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
- kref);
- struct hl_device *hdev = hw_sob->hdev;
-
- dev_crit(hdev->dev,
- "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
- hw_sob->q_idx, hw_sob->sob_id);
-}
-
-static const char *hl_fence_get_driver_name(struct dma_fence *fence)
-{
- return "HabanaLabs";
-}
-
-static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
-{
- struct hl_cs_compl *hl_cs_compl =
- container_of(fence, struct hl_cs_compl, base_fence);
-
- return dev_name(hl_cs_compl->hdev->dev);
-}
-
-static bool hl_fence_enable_signaling(struct dma_fence *fence)
-{
- return true;
-}
-
-static void hl_fence_release(struct dma_fence *fence)
-{
- struct hl_cs_compl *hl_cs_cmpl =
- container_of(fence, struct hl_cs_compl, base_fence);
- struct hl_device *hdev = hl_cs_cmpl->hdev;
-
- /* EBUSY means the CS was never submitted and hence we don't have
- * an attached hw_sob object that we should handle here
- */
- if (fence->error == -EBUSY)
- goto free;
-
- if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
- (hl_cs_cmpl->type == CS_TYPE_WAIT)) {
-
- dev_dbg(hdev->dev,
- "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
- hl_cs_cmpl->cs_seq,
- hl_cs_cmpl->type,
- hl_cs_cmpl->hw_sob->sob_id,
- hl_cs_cmpl->sob_val);
-
- /*
- * A signal CS can get completion while the corresponding wait
- * for signal CS is on its way to the PQ. The wait for signal CS
- * will get stuck if the signal CS incremented the SOB to its
- * max value and there are no pending (submitted) waits on this
- * SOB.
- * We do the following to void this situation:
- * 1. The wait for signal CS must get a ref for the signal CS as
- * soon as possible in cs_ioctl_signal_wait() and put it
- * before being submitted to the PQ but after it incremented
- * the SOB refcnt in init_signal_wait_cs().
- * 2. Signal/Wait for signal CS will decrement the SOB refcnt
- * here.
- * These two measures guarantee that the wait for signal CS will
- * reset the SOB upon completion rather than the signal CS and
- * hence the above scenario is avoided.
- */
- kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
- }
-
-free:
- kfree_rcu(hl_cs_cmpl, base_fence.rcu);
-}
-
-static const struct dma_fence_ops hl_fence_ops = {
- .get_driver_name = hl_fence_get_driver_name,
- .get_timeline_name = hl_fence_get_timeline_name,
- .enable_signaling = hl_fence_enable_signaling,
- .release = hl_fence_release
-};
-
-static void cs_get(struct hl_cs *cs)
-{
- kref_get(&cs->refcount);
-}
-
-static int cs_get_unless_zero(struct hl_cs *cs)
-{
- return kref_get_unless_zero(&cs->refcount);
-}
-
-static void cs_put(struct hl_cs *cs)
-{
- kref_put(&cs->refcount, cs_do_release);
-}
-
-static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
-{
- /*
- * Patched CB is created for external queues jobs, and for H/W queues
- * jobs if the user CB was allocated by driver and MMU is disabled.
- */
- return (job->queue_type == QUEUE_TYPE_EXT ||
- (job->queue_type == QUEUE_TYPE_HW &&
- job->is_kernel_allocated_cb &&
- !hdev->mmu_enable));
-}
-
-/*
- * cs_parser - parse the user command submission
- *
- * @hpriv : pointer to the private data of the fd
- * @job : pointer to the job that holds the command submission info
- *
- * The function parses the command submission of the user. It calls the
- * ASIC specific parser, which returns a list of memory blocks to send
- * to the device as different command buffers
- *
- */
-static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_cs_parser parser;
- int rc;
-
- parser.ctx_id = job->cs->ctx->asid;
- parser.cs_sequence = job->cs->sequence;
- parser.job_id = job->id;
-
- parser.hw_queue_id = job->hw_queue_id;
- parser.job_userptr_list = &job->userptr_list;
- parser.patched_cb = NULL;
- parser.user_cb = job->user_cb;
- parser.user_cb_size = job->user_cb_size;
- parser.queue_type = job->queue_type;
- parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
- job->patched_cb = NULL;
-
- rc = hdev->asic_funcs->cs_parser(hdev, &parser);
-
- if (is_cb_patched(hdev, job)) {
- if (!rc) {
- job->patched_cb = parser.patched_cb;
- job->job_cb_size = parser.patched_cb_size;
- job->contains_dma_pkt = parser.contains_dma_pkt;
-
- spin_lock(&job->patched_cb->lock);
- job->patched_cb->cs_cnt++;
- spin_unlock(&job->patched_cb->lock);
- }
-
- /*
- * Whether the parsing worked or not, we don't need the
- * original CB anymore because it was already parsed and
- * won't be accessed again for this CS
- */
- spin_lock(&job->user_cb->lock);
- job->user_cb->cs_cnt--;
- spin_unlock(&job->user_cb->lock);
- hl_cb_put(job->user_cb);
- job->user_cb = NULL;
- } else if (!rc) {
- job->job_cb_size = job->user_cb_size;
- }
-
- return rc;
-}
-
-static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
-{
- struct hl_cs *cs = job->cs;
-
- if (is_cb_patched(hdev, job)) {
- hl_userptr_delete_list(hdev, &job->userptr_list);
-
- /*
- * We might arrive here from rollback and patched CB wasn't
- * created, so we need to check it's not NULL
- */
- if (job->patched_cb) {
- spin_lock(&job->patched_cb->lock);
- job->patched_cb->cs_cnt--;
- spin_unlock(&job->patched_cb->lock);
-
- hl_cb_put(job->patched_cb);
- }
- }
-
- /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
- * enabled, the user CB isn't released in cs_parser() and thus should be
- * released here.
- */
- if (job->queue_type == QUEUE_TYPE_HW &&
- job->is_kernel_allocated_cb && hdev->mmu_enable) {
- spin_lock(&job->user_cb->lock);
- job->user_cb->cs_cnt--;
- spin_unlock(&job->user_cb->lock);
-
- hl_cb_put(job->user_cb);
- }
-
- /*
- * This is the only place where there can be multiple threads
- * modifying the list at the same time
- */
- spin_lock(&cs->job_lock);
- list_del(&job->cs_node);
- spin_unlock(&cs->job_lock);
-
- hl_debugfs_remove_job(hdev, job);
-
- if (job->queue_type == QUEUE_TYPE_EXT ||
- job->queue_type == QUEUE_TYPE_HW)
- cs_put(cs);
-
- kfree(job);
-}
-
-static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
-{
- hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
- ctx->cs_counters.device_in_reset_drop_cnt;
- hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
- ctx->cs_counters.out_of_mem_drop_cnt;
- hdev->aggregated_cs_counters.parsing_drop_cnt +=
- ctx->cs_counters.parsing_drop_cnt;
- hdev->aggregated_cs_counters.queue_full_drop_cnt +=
- ctx->cs_counters.queue_full_drop_cnt;
-}
-
-static void cs_do_release(struct kref *ref)
-{
- struct hl_cs *cs = container_of(ref, struct hl_cs,
- refcount);
- struct hl_device *hdev = cs->ctx->hdev;
- struct hl_cs_job *job, *tmp;
-
- cs->completed = true;
-
- /*
- * Although if we reached here it means that all external jobs have
- * finished, because each one of them took refcnt to CS, we still
- * need to go over the internal jobs and free them. Otherwise, we
- * will have leaked memory and what's worse, the CS object (and
- * potentially the CTX object) could be released, while the JOB
- * still holds a pointer to them (but no reference).
- */
- list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
- free_job(hdev, job);
-
- /* We also need to update CI for internal queues */
- if (cs->submitted) {
- hdev->asic_funcs->hw_queues_lock(hdev);
-
- hdev->cs_active_cnt--;
- if (!hdev->cs_active_cnt) {
- struct hl_device_idle_busy_ts *ts;
-
- ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
- ts->busy_to_idle_ts = ktime_get();
-
- if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
- hdev->idle_busy_ts_idx = 0;
- } else if (hdev->cs_active_cnt < 0) {
- dev_crit(hdev->dev, "CS active cnt %d is negative\n",
- hdev->cs_active_cnt);
- }
-
- hdev->asic_funcs->hw_queues_unlock(hdev);
-
- hl_int_hw_queue_update_ci(cs);
-
- spin_lock(&hdev->hw_queues_mirror_lock);
- /* remove CS from hw_queues mirror list */
- list_del_init(&cs->mirror_node);
- spin_unlock(&hdev->hw_queues_mirror_lock);
-
- /*
- * Don't cancel TDR in case this CS was timedout because we
- * might be running from the TDR context
- */
- if ((!cs->timedout) &&
- (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
- struct hl_cs *next;
-
- if (cs->tdr_active)
- cancel_delayed_work_sync(&cs->work_tdr);
-
- spin_lock(&hdev->hw_queues_mirror_lock);
-
- /* queue TDR for next CS */
- next = list_first_entry_or_null(
- &hdev->hw_queues_mirror_list,
- struct hl_cs, mirror_node);
-
- if ((next) && (!next->tdr_active)) {
- next->tdr_active = true;
- schedule_delayed_work(&next->work_tdr,
- hdev->timeout_jiffies);
- }
-
- spin_unlock(&hdev->hw_queues_mirror_lock);
- }
- } else if (cs->type == CS_TYPE_WAIT) {
- /*
- * In case the wait for signal CS was submitted, the put occurs
- * in init_signal_wait_cs() right before hanging on the PQ.
- */
- dma_fence_put(cs->signal_fence);
- }
-
- /*
- * Must be called before hl_ctx_put because inside we use ctx to get
- * the device
- */
- hl_debugfs_remove_cs(cs);
-
- hl_ctx_put(cs->ctx);
-
- /* We need to mark an error for not submitted because in that case
- * the dma fence release flow is different. Mainly, we don't need
- * to handle hw_sob for signal/wait
- */
- if (cs->timedout)
- dma_fence_set_error(cs->fence, -ETIMEDOUT);
- else if (cs->aborted)
- dma_fence_set_error(cs->fence, -EIO);
- else if (!cs->submitted)
- dma_fence_set_error(cs->fence, -EBUSY);
-
- dma_fence_signal(cs->fence);
- dma_fence_put(cs->fence);
-
- cs_counters_aggregate(hdev, cs->ctx);
-
- kfree(cs->jobs_in_queue_cnt);
- kfree(cs);
-}
-
-static void cs_timedout(struct work_struct *work)
-{
- struct hl_device *hdev;
- int ctx_asid, rc;
- struct hl_cs *cs = container_of(work, struct hl_cs,
- work_tdr.work);
- rc = cs_get_unless_zero(cs);
- if (!rc)
- return;
-
- if ((!cs->submitted) || (cs->completed)) {
- cs_put(cs);
- return;
- }
-
- /* Mark the CS is timed out so we won't try to cancel its TDR */
- cs->timedout = true;
-
- hdev = cs->ctx->hdev;
- ctx_asid = cs->ctx->asid;
-
- dev_err(hdev->dev,
- "Command submission %llu has not finished in time!\n",
- cs->sequence);
-
- cs_put(cs);
-
- if (hdev->reset_on_lockup)
- hl_device_reset(hdev, false, false);
-}
-
-static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
- enum hl_cs_type cs_type, struct hl_cs **cs_new)
-{
- struct hl_cs_compl *cs_cmpl;
- struct dma_fence *other = NULL;
- struct hl_cs *cs;
- int rc;
-
- cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
- if (!cs)
- return -ENOMEM;
-
- cs->ctx = ctx;
- cs->submitted = false;
- cs->completed = false;
- cs->type = cs_type;
- INIT_LIST_HEAD(&cs->job_list);
- INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
- kref_init(&cs->refcount);
- spin_lock_init(&cs->job_lock);
-
- cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
- if (!cs_cmpl) {
- rc = -ENOMEM;
- goto free_cs;
- }
-
- cs_cmpl->hdev = hdev;
- cs_cmpl->type = cs->type;
- spin_lock_init(&cs_cmpl->lock);
- cs->fence = &cs_cmpl->base_fence;
-
- spin_lock(&ctx->cs_lock);
-
- cs_cmpl->cs_seq = ctx->cs_sequence;
- other = ctx->cs_pending[cs_cmpl->cs_seq &
- (hdev->asic_prop.max_pending_cs - 1)];
- if ((other) && (!dma_fence_is_signaled(other))) {
- dev_dbg(hdev->dev,
- "Rejecting CS because of too many in-flights CS\n");
- rc = -EAGAIN;
- goto free_fence;
- }
-
- cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
- sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
- if (!cs->jobs_in_queue_cnt) {
- rc = -ENOMEM;
- goto free_fence;
- }
-
- dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
- ctx->asid, ctx->cs_sequence);
-
- cs->sequence = cs_cmpl->cs_seq;
-
- ctx->cs_pending[cs_cmpl->cs_seq &
- (hdev->asic_prop.max_pending_cs - 1)] =
- &cs_cmpl->base_fence;
- ctx->cs_sequence++;
-
- dma_fence_get(&cs_cmpl->base_fence);
-
- dma_fence_put(other);
-
- spin_unlock(&ctx->cs_lock);
-
- *cs_new = cs;
-
- return 0;
-
-free_fence:
- spin_unlock(&ctx->cs_lock);
- kfree(cs_cmpl);
-free_cs:
- kfree(cs);
- return rc;
-}
-
-static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
-{
- struct hl_cs_job *job, *tmp;
-
- list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
- free_job(hdev, job);
-}
-
-void hl_cs_rollback_all(struct hl_device *hdev)
-{
- int i;
- struct hl_cs *cs, *tmp;
-
- /* flush all completions */
- for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
- flush_workqueue(hdev->cq_wq[i]);
-
- /* Make sure we don't have leftovers in the H/W queues mirror list */
- list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
- mirror_node) {
- cs_get(cs);
- cs->aborted = true;
- dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
- cs->ctx->asid, cs->sequence);
- cs_rollback(hdev, cs);
- cs_put(cs);
- }
-}
-
-static void job_wq_completion(struct work_struct *work)
-{
- struct hl_cs_job *job = container_of(work, struct hl_cs_job,
- finish_work);
- struct hl_cs *cs = job->cs;
- struct hl_device *hdev = cs->ctx->hdev;
-
- /* job is no longer needed */
- free_job(hdev, job);
-}
-
-static int validate_queue_index(struct hl_device *hdev,
- struct hl_cs_chunk *chunk,
- enum hl_queue_type *queue_type,
- bool *is_kernel_allocated_cb)
-{
- struct asic_fixed_properties *asic = &hdev->asic_prop;
- struct hw_queue_properties *hw_queue_prop;
-
- /* This must be checked here to prevent out-of-bounds access to
- * hw_queues_props array
- */
- if (chunk->queue_index >= asic->max_queues) {
- dev_err(hdev->dev, "Queue index %d is invalid\n",
- chunk->queue_index);
- return -EINVAL;
- }
-
- hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
-
- if (hw_queue_prop->type == QUEUE_TYPE_NA) {
- dev_err(hdev->dev, "Queue index %d is invalid\n",
- chunk->queue_index);
- return -EINVAL;
- }
-
- if (hw_queue_prop->driver_only) {
- dev_err(hdev->dev,
- "Queue index %d is restricted for the kernel driver\n",
- chunk->queue_index);
- return -EINVAL;
- }
-
- *queue_type = hw_queue_prop->type;
- *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
-
- return 0;
-}
-
-static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
- struct hl_cb_mgr *cb_mgr,
- struct hl_cs_chunk *chunk)
-{
- struct hl_cb *cb;
- u32 cb_handle;
-
- cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
-
- cb = hl_cb_get(hdev, cb_mgr, cb_handle);
- if (!cb) {
- dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
- return NULL;
- }
-
- if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
- dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
- goto release_cb;
- }
-
- spin_lock(&cb->lock);
- cb->cs_cnt++;
- spin_unlock(&cb->lock);
-
- return cb;
-
-release_cb:
- hl_cb_put(cb);
- return NULL;
-}
-
-struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
- enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
-{
- struct hl_cs_job *job;
-
- job = kzalloc(sizeof(*job), GFP_ATOMIC);
- if (!job)
- return NULL;
-
- job->queue_type = queue_type;
- job->is_kernel_allocated_cb = is_kernel_allocated_cb;
-
- if (is_cb_patched(hdev, job))
- INIT_LIST_HEAD(&job->userptr_list);
-
- if (job->queue_type == QUEUE_TYPE_EXT)
- INIT_WORK(&job->finish_work, job_wq_completion);
-
- return job;
-}
-
-static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
- u32 num_chunks, u64 *cs_seq)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_cs_chunk *cs_chunk_array;
- struct hl_cs_job *job;
- struct hl_cs *cs;
- struct hl_cb *cb;
- bool int_queues_only = true;
- u32 size_to_copy;
- int rc, i;
-
- *cs_seq = ULLONG_MAX;
-
- if (num_chunks > HL_MAX_JOBS_PER_CS) {
- dev_err(hdev->dev,
- "Number of chunks can NOT be larger than %d\n",
- HL_MAX_JOBS_PER_CS);
- rc = -EINVAL;
- goto out;
- }
-
- cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
- GFP_ATOMIC);
- if (!cs_chunk_array) {
- rc = -ENOMEM;
- goto out;
- }
-
- size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
- if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
- dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
- rc = -EFAULT;
- goto free_cs_chunk_array;
- }
-
- /* increment refcnt for context */
- hl_ctx_get(hdev, hpriv->ctx);
-
- rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
- if (rc) {
- hl_ctx_put(hpriv->ctx);
- goto free_cs_chunk_array;
- }
-
- *cs_seq = cs->sequence;
-
- hl_debugfs_add_cs(cs);
-
- /* Validate ALL the CS chunks before submitting the CS */
- for (i = 0 ; i < num_chunks ; i++) {
- struct hl_cs_chunk *chunk = &cs_chunk_array[i];
- enum hl_queue_type queue_type;
- bool is_kernel_allocated_cb;
-
- rc = validate_queue_index(hdev, chunk, &queue_type,
- &is_kernel_allocated_cb);
- if (rc) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
- goto free_cs_object;
- }
-
- if (is_kernel_allocated_cb) {
- cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
- if (!cb) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
- rc = -EINVAL;
- goto free_cs_object;
- }
- } else {
- cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
- }
-
- if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
- int_queues_only = false;
-
- job = hl_cs_allocate_job(hdev, queue_type,
- is_kernel_allocated_cb);
- if (!job) {
- hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
- dev_err(hdev->dev, "Failed to allocate a new job\n");
- rc = -ENOMEM;
- if (is_kernel_allocated_cb)
- goto release_cb;
- else
- goto free_cs_object;
- }
-
- job->id = i + 1;
- job->cs = cs;
- job->user_cb = cb;
- job->user_cb_size = chunk->cb_size;
- job->hw_queue_id = chunk->queue_index;
-
- cs->jobs_in_queue_cnt[job->hw_queue_id]++;
-
- list_add_tail(&job->cs_node, &cs->job_list);
-
- /*
- * Increment CS reference. When CS reference is 0, CS is
- * done and can be signaled to user and free all its resources
- * Only increment for JOB on external or H/W queues, because
- * only for those JOBs we get completion
- */
- if (job->queue_type == QUEUE_TYPE_EXT ||
- job->queue_type == QUEUE_TYPE_HW)
- cs_get(cs);
-
- hl_debugfs_add_job(hdev, job);
-
- rc = cs_parser(hpriv, job);
- if (rc) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
- dev_err(hdev->dev,
- "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
- cs->ctx->asid, cs->sequence, job->id, rc);
- goto free_cs_object;
- }
- }
-
- if (int_queues_only) {
- hpriv->ctx->cs_counters.parsing_drop_cnt++;
- dev_err(hdev->dev,
- "Reject CS %d.%llu because only internal queues jobs are present\n",
- cs->ctx->asid, cs->sequence);
- rc = -EINVAL;
- goto free_cs_object;
- }
-
- rc = hl_hw_queue_schedule_cs(cs);
- if (rc) {
- if (rc != -EAGAIN)
- dev_err(hdev->dev,
- "Failed to submit CS %d.%llu to H/W queues, error %d\n",
- cs->ctx->asid, cs->sequence, rc);
- goto free_cs_object;
- }
-
- rc = HL_CS_STATUS_SUCCESS;
- goto put_cs;
-
-release_cb:
- spin_lock(&cb->lock);
- cb->cs_cnt--;
- spin_unlock(&cb->lock);
- hl_cb_put(cb);
-free_cs_object:
- cs_rollback(hdev, cs);
- *cs_seq = ULLONG_MAX;
- /* The path below is both for good and erroneous exits */
-put_cs:
- /* We finished with the CS in this function, so put the ref */
- cs_put(cs);
-free_cs_chunk_array:
- kfree(cs_chunk_array);
-out:
- return rc;
-}
-
-static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
- void __user *chunks, u32 num_chunks,
- u64 *cs_seq)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_ctx *ctx = hpriv->ctx;
- struct hl_cs_chunk *cs_chunk_array, *chunk;
- struct hw_queue_properties *hw_queue_prop;
- struct dma_fence *sig_fence = NULL;
- struct hl_cs_job *job;
- struct hl_cs *cs;
- struct hl_cb *cb;
- enum hl_queue_type q_type;
- u64 *signal_seq_arr = NULL, signal_seq;
- u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
- int rc;
-
- *cs_seq = ULLONG_MAX;
-
- if (num_chunks > HL_MAX_JOBS_PER_CS) {
- dev_err(hdev->dev,
- "Number of chunks can NOT be larger than %d\n",
- HL_MAX_JOBS_PER_CS);
- rc = -EINVAL;
- goto out;
- }
-
- cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
- GFP_ATOMIC);
- if (!cs_chunk_array) {
- rc = -ENOMEM;
- goto out;
- }
-
- size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
- if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
- dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
- rc = -EFAULT;
- goto free_cs_chunk_array;
- }
-
- /* currently it is guaranteed to have only one chunk */
- chunk = &cs_chunk_array[0];
- q_idx = chunk->queue_index;
- hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
- q_type = hw_queue_prop->type;
-
- if ((q_idx >= hdev->asic_prop.max_queues) ||
- (!hw_queue_prop->supports_sync_stream)) {
- dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
- rc = -EINVAL;
- goto free_cs_chunk_array;
- }
-
- if (cs_type == CS_TYPE_WAIT) {
- struct hl_cs_compl *sig_waitcs_cmpl;
-
- signal_seq_arr_len = chunk->num_signal_seq_arr;
-
- /* currently only one signal seq is supported */
- if (signal_seq_arr_len != 1) {
- dev_err(hdev->dev,
- "Wait for signal CS supports only one signal CS seq\n");
- rc = -EINVAL;
- goto free_cs_chunk_array;
- }
-
- signal_seq_arr = kmalloc_array(signal_seq_arr_len,
- sizeof(*signal_seq_arr),
- GFP_ATOMIC);
- if (!signal_seq_arr) {
- rc = -ENOMEM;
- goto free_cs_chunk_array;
- }
-
- size_to_copy = chunk->num_signal_seq_arr *
- sizeof(*signal_seq_arr);
- if (copy_from_user(signal_seq_arr,
- u64_to_user_ptr(chunk->signal_seq_arr),
- size_to_copy)) {
- dev_err(hdev->dev,
- "Failed to copy signal seq array from user\n");
- rc = -EFAULT;
- goto free_signal_seq_array;
- }
-
- /* currently it is guaranteed to have only one signal seq */
- signal_seq = signal_seq_arr[0];
- sig_fence = hl_ctx_get_fence(ctx, signal_seq);
- if (IS_ERR(sig_fence)) {
- dev_err(hdev->dev,
- "Failed to get signal CS with seq 0x%llx\n",
- signal_seq);
- rc = PTR_ERR(sig_fence);
- goto free_signal_seq_array;
- }
-
- if (!sig_fence) {
- /* signal CS already finished */
- rc = 0;
- goto free_signal_seq_array;
- }
-
- sig_waitcs_cmpl =
- container_of(sig_fence, struct hl_cs_compl, base_fence);
-
- if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
- dev_err(hdev->dev,
- "CS seq 0x%llx is not of a signal CS\n",
- signal_seq);
- dma_fence_put(sig_fence);
- rc = -EINVAL;
- goto free_signal_seq_array;
- }
-
- if (dma_fence_is_signaled(sig_fence)) {
- /* signal CS already finished */
- dma_fence_put(sig_fence);
- rc = 0;
- goto free_signal_seq_array;
- }
- }
-
- /* increment refcnt for context */
- hl_ctx_get(hdev, ctx);
-
- rc = allocate_cs(hdev, ctx, cs_type, &cs);
- if (rc) {
- if (cs_type == CS_TYPE_WAIT)
- dma_fence_put(sig_fence);
- hl_ctx_put(ctx);
- goto free_signal_seq_array;
- }
-
- /*
- * Save the signal CS fence for later initialization right before
- * hanging the wait CS on the queue.
- */
- if (cs->type == CS_TYPE_WAIT)
- cs->signal_fence = sig_fence;
-
- hl_debugfs_add_cs(cs);
-
- *cs_seq = cs->sequence;
-
- job = hl_cs_allocate_job(hdev, q_type, true);
- if (!job) {
- ctx->cs_counters.out_of_mem_drop_cnt++;
- dev_err(hdev->dev, "Failed to allocate a new job\n");
- rc = -ENOMEM;
- goto put_cs;
- }
-
- cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
- if (!cb) {
- ctx->cs_counters.out_of_mem_drop_cnt++;
- kfree(job);
- rc = -EFAULT;
- goto put_cs;
- }
-
- if (cs->type == CS_TYPE_WAIT)
- cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
- else
- cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
-
- job->id = 0;
- job->cs = cs;
- job->user_cb = cb;
- job->user_cb->cs_cnt++;
- job->user_cb_size = cb_size;
- job->hw_queue_id = q_idx;
-
- /*
- * No need in parsing, user CB is the patched CB.
- * We call hl_cb_destroy() out of two reasons - we don't need the CB in
- * the CB idr anymore and to decrement its refcount as it was
- * incremented inside hl_cb_kernel_create().
- */
- job->patched_cb = job->user_cb;
- job->job_cb_size = job->user_cb_size;
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
-
- cs->jobs_in_queue_cnt[job->hw_queue_id]++;
-
- list_add_tail(&job->cs_node, &cs->job_list);
-
- /* increment refcount as for external queues we get completion */
- cs_get(cs);
-
- hl_debugfs_add_job(hdev, job);
-
- rc = hl_hw_queue_schedule_cs(cs);
- if (rc) {
- if (rc != -EAGAIN)
- dev_err(hdev->dev,
- "Failed to submit CS %d.%llu to H/W queues, error %d\n",
- ctx->asid, cs->sequence, rc);
- goto free_cs_object;
- }
-
- rc = HL_CS_STATUS_SUCCESS;
- goto put_cs;
-
-free_cs_object:
- cs_rollback(hdev, cs);
- *cs_seq = ULLONG_MAX;
- /* The path below is both for good and erroneous exits */
-put_cs:
- /* We finished with the CS in this function, so put the ref */
- cs_put(cs);
-free_signal_seq_array:
- if (cs_type == CS_TYPE_WAIT)
- kfree(signal_seq_arr);
-free_cs_chunk_array:
- kfree(cs_chunk_array);
-out:
- return rc;
-}
-
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
-{
- struct hl_device *hdev = hpriv->hdev;
- union hl_cs_args *args = data;
- struct hl_ctx *ctx = hpriv->ctx;
- void __user *chunks_execute, *chunks_restore;
- enum hl_cs_type cs_type;
- u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
- u64 cs_seq = ULONG_MAX;
- int rc, do_ctx_switch;
- bool need_soft_reset = false;
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_warn_ratelimited(hdev->dev,
- "Device is %s. Can't submit new CS\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
- rc = -EBUSY;
- goto out;
- }
-
- sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
-
- if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
- dev_err(hdev->dev,
- "Signal and wait CS flags are mutually exclusive, context %d\n",
- ctx->asid);
- rc = -EINVAL;
- goto out;
- }
-
- if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
- (!hdev->supports_sync_stream))) {
- dev_err(hdev->dev, "Sync stream CS is not supported\n");
- rc = -EINVAL;
- goto out;
- }
-
- if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
- cs_type = CS_TYPE_SIGNAL;
- else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
- cs_type = CS_TYPE_WAIT;
- else
- cs_type = CS_TYPE_DEFAULT;
-
- chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
- num_chunks_execute = args->in.num_chunks_execute;
-
- if (cs_type == CS_TYPE_DEFAULT) {
- if (!num_chunks_execute) {
- dev_err(hdev->dev,
- "Got execute CS with 0 chunks, context %d\n",
- ctx->asid);
- rc = -EINVAL;
- goto out;
- }
- } else if (num_chunks_execute != 1) {
- dev_err(hdev->dev,
- "Sync stream CS mandates one chunk only, context %d\n",
- ctx->asid);
- rc = -EINVAL;
- goto out;
- }
-
- do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
-
- if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
- long ret;
-
- chunks_restore =
- (void __user *) (uintptr_t) args->in.chunks_restore;
- num_chunks_restore = args->in.num_chunks_restore;
-
- mutex_lock(&hpriv->restore_phase_mutex);
-
- if (do_ctx_switch) {
- rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
- if (rc) {
- dev_err_ratelimited(hdev->dev,
- "Failed to switch to context %d, rejecting CS! %d\n",
- ctx->asid, rc);
- /*
- * If we timedout, or if the device is not IDLE
- * while we want to do context-switch (-EBUSY),
- * we need to soft-reset because QMAN is
- * probably stuck. However, we can't call to
- * reset here directly because of deadlock, so
- * need to do it at the very end of this
- * function
- */
- if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
- need_soft_reset = true;
- mutex_unlock(&hpriv->restore_phase_mutex);
- goto out;
- }
- }
-
- hdev->asic_funcs->restore_phase_topology(hdev);
-
- if (!num_chunks_restore) {
- dev_dbg(hdev->dev,
- "Need to run restore phase but restore CS is empty\n");
- rc = 0;
- } else {
- rc = cs_ioctl_default(hpriv, chunks_restore,
- num_chunks_restore, &cs_seq);
- }
-
- mutex_unlock(&hpriv->restore_phase_mutex);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to submit restore CS for context %d (%d)\n",
- ctx->asid, rc);
- goto out;
- }
-
- /* Need to wait for restore completion before execution phase */
- if (num_chunks_restore) {
- ret = _hl_cs_wait_ioctl(hdev, ctx,
- jiffies_to_usecs(hdev->timeout_jiffies),
- cs_seq);
- if (ret <= 0) {
- dev_err(hdev->dev,
- "Restore CS for context %d failed to complete %ld\n",
- ctx->asid, ret);
- rc = -ENOEXEC;
- goto out;
- }
- }
-
- ctx->thread_ctx_switch_wait_token = 1;
- } else if (!ctx->thread_ctx_switch_wait_token) {
- u32 tmp;
-
- rc = hl_poll_timeout_memory(hdev,
- &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
- 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
-
- if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev,
- "context switch phase timeout (%d)\n", tmp);
- goto out;
- }
- }
-
- if (cs_type == CS_TYPE_DEFAULT)
- rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
- &cs_seq);
- else
- rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
- num_chunks_execute, &cs_seq);
-
-out:
- if (rc != -EAGAIN) {
- memset(args, 0, sizeof(*args));
- args->out.status = rc;
- args->out.seq = cs_seq;
- }
-
- if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
- hl_device_reset(hdev, false, false);
-
- return rc;
-}
-
-static long _hl_cs_wait_ioctl(struct hl_device *hdev,
- struct hl_ctx *ctx, u64 timeout_us, u64 seq)
-{
- struct dma_fence *fence;
- unsigned long timeout;
- long rc;
-
- if (timeout_us == MAX_SCHEDULE_TIMEOUT)
- timeout = timeout_us;
- else
- timeout = usecs_to_jiffies(timeout_us);
-
- hl_ctx_get(hdev, ctx);
-
- fence = hl_ctx_get_fence(ctx, seq);
- if (IS_ERR(fence)) {
- rc = PTR_ERR(fence);
- if (rc == -EINVAL)
- dev_notice_ratelimited(hdev->dev,
- "Can't wait on CS %llu because current CS is at seq %llu\n",
- seq, ctx->cs_sequence);
- } else if (fence) {
- rc = dma_fence_wait_timeout(fence, true, timeout);
- if (fence->error == -ETIMEDOUT)
- rc = -ETIMEDOUT;
- else if (fence->error == -EIO)
- rc = -EIO;
- dma_fence_put(fence);
- } else {
- dev_dbg(hdev->dev,
- "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
- seq, ctx->cs_sequence);
- rc = 1;
- }
-
- hl_ctx_put(ctx);
-
- return rc;
-}
-
-int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
-{
- struct hl_device *hdev = hpriv->hdev;
- union hl_wait_cs_args *args = data;
- u64 seq = args->in.seq;
- long rc;
-
- rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
-
- memset(args, 0, sizeof(*args));
-
- if (rc < 0) {
- if (rc == -ERESTARTSYS) {
- dev_err_ratelimited(hdev->dev,
- "user process got signal while waiting for CS handle %llu\n",
- seq);
- args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
- rc = -EINTR;
- } else if (rc == -ETIMEDOUT) {
- dev_err_ratelimited(hdev->dev,
- "CS %llu has timed-out while user process is waiting for it\n",
- seq);
- args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
- } else if (rc == -EIO) {
- dev_err_ratelimited(hdev->dev,
- "CS %llu has been aborted while user process is waiting for it\n",
- seq);
- args->out.status = HL_WAIT_CS_STATUS_ABORTED;
- }
- return rc;
- }
-
- if (rc == 0)
- args->out.status = HL_WAIT_CS_STATUS_BUSY;
- else
- args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
-
- return 0;
-}
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+subdir-ccflags-y += -I$(src)/common
+
+HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
+ common/asid.o common/habanalabs_ioctl.o \
+ common/command_buffer.o common/hw_queue.o common/irq.o \
+ common/sysfs.o common/hwmon.o common/memory.o \
+ common/command_submission.o common/mmu.o common/firmware_if.o \
+ common/pci.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+int hl_asid_init(struct hl_device *hdev)
+{
+ hdev->asid_bitmap = kcalloc(BITS_TO_LONGS(hdev->asic_prop.max_asid),
+ sizeof(*hdev->asid_bitmap), GFP_KERNEL);
+ if (!hdev->asid_bitmap)
+ return -ENOMEM;
+
+ mutex_init(&hdev->asid_mutex);
+
+ /* ASID 0 is reserved for the kernel driver and device CPU */
+ set_bit(0, hdev->asid_bitmap);
+
+ return 0;
+}
+
+void hl_asid_fini(struct hl_device *hdev)
+{
+ mutex_destroy(&hdev->asid_mutex);
+ kfree(hdev->asid_bitmap);
+}
+
+unsigned long hl_asid_alloc(struct hl_device *hdev)
+{
+ unsigned long found;
+
+ mutex_lock(&hdev->asid_mutex);
+
+ found = find_first_zero_bit(hdev->asid_bitmap,
+ hdev->asic_prop.max_asid);
+ if (found == hdev->asic_prop.max_asid)
+ found = 0;
+ else
+ set_bit(found, hdev->asid_bitmap);
+
+ mutex_unlock(&hdev->asid_mutex);
+
+ return found;
+}
+
+void hl_asid_free(struct hl_device *hdev, unsigned long asid)
+{
+ if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
+ "Invalid ASID %lu", asid))
+ return;
+ clear_bit(asid, hdev->asid_bitmap);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
+{
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
+ (void *) (uintptr_t) cb->kernel_address,
+ cb->bus_address);
+ kfree(cb);
+}
+
+static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
+{
+ if (cb->is_pool) {
+ spin_lock(&hdev->cb_pool_lock);
+ list_add(&cb->pool_list, &hdev->cb_pool);
+ spin_unlock(&hdev->cb_pool_lock);
+ } else {
+ cb_fini(hdev, cb);
+ }
+}
+
+static void cb_release(struct kref *ref)
+{
+ struct hl_device *hdev;
+ struct hl_cb *cb;
+
+ cb = container_of(ref, struct hl_cb, refcount);
+ hdev = cb->hdev;
+
+ hl_debugfs_remove_cb(cb);
+
+ cb_do_release(hdev, cb);
+}
+
+static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
+ int ctx_id)
+{
+ struct hl_cb *cb;
+ void *p;
+
+ /*
+ * We use of GFP_ATOMIC here because this function can be called from
+ * the latency-sensitive code path for command submission. Due to H/W
+ * limitations in some of the ASICs, the kernel must copy the user CB
+ * that is designated for an external queue and actually enqueue
+ * the kernel's copy. Hence, we must never sleep in this code section
+ * and must use GFP_ATOMIC for all memory allocations.
+ */
+ if (ctx_id == HL_KERNEL_ASID_ID)
+ cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
+ else
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+
+ if (!cb)
+ return NULL;
+
+ if (ctx_id == HL_KERNEL_ASID_ID)
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
+ &cb->bus_address, GFP_ATOMIC);
+ else
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
+ &cb->bus_address,
+ GFP_USER | __GFP_ZERO);
+ if (!p) {
+ dev_err(hdev->dev,
+ "failed to allocate %d of dma memory for CB\n",
+ cb_size);
+ kfree(cb);
+ return NULL;
+ }
+
+ cb->kernel_address = (u64) (uintptr_t) p;
+ cb->size = cb_size;
+
+ return cb;
+}
+
+int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+ u32 cb_size, u64 *handle, int ctx_id)
+{
+ struct hl_cb *cb;
+ bool alloc_new_cb = true;
+ int rc;
+
+ /*
+ * Can't use generic function to check this because of special case
+ * where we create a CB as part of the reset process
+ */
+ if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) &&
+ (ctx_id != HL_KERNEL_ASID_ID))) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is disabled or in reset. Can't create new CBs\n");
+ rc = -EBUSY;
+ goto out_err;
+ }
+
+ if (cb_size > SZ_2M) {
+ dev_err(hdev->dev, "CB size %d must be less than %d\n",
+ cb_size, SZ_2M);
+ rc = -EINVAL;
+ goto out_err;
+ }
+
+ /* Minimum allocation must be PAGE SIZE */
+ if (cb_size < PAGE_SIZE)
+ cb_size = PAGE_SIZE;
+
+ if (ctx_id == HL_KERNEL_ASID_ID &&
+ cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+
+ spin_lock(&hdev->cb_pool_lock);
+ if (!list_empty(&hdev->cb_pool)) {
+ cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
+ pool_list);
+ list_del(&cb->pool_list);
+ spin_unlock(&hdev->cb_pool_lock);
+ alloc_new_cb = false;
+ } else {
+ spin_unlock(&hdev->cb_pool_lock);
+ dev_dbg(hdev->dev, "CB pool is empty\n");
+ }
+ }
+
+ if (alloc_new_cb) {
+ cb = hl_cb_alloc(hdev, cb_size, ctx_id);
+ if (!cb) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+ }
+
+ cb->hdev = hdev;
+ cb->ctx_id = ctx_id;
+
+ spin_lock(&mgr->cb_lock);
+ rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
+ spin_unlock(&mgr->cb_lock);
+
+ if (rc < 0) {
+ dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
+ goto release_cb;
+ }
+
+ cb->id = rc;
+
+ kref_init(&cb->refcount);
+ spin_lock_init(&cb->lock);
+
+ /*
+ * idr is 32-bit so we can safely OR it with a mask that is above
+ * 32 bit
+ */
+ *handle = cb->id | HL_MMAP_CB_MASK;
+ *handle <<= PAGE_SHIFT;
+
+ hl_debugfs_add_cb(cb);
+
+ return 0;
+
+release_cb:
+ cb_do_release(hdev, cb);
+out_err:
+ *handle = 0;
+
+ return rc;
+}
+
+int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
+{
+ struct hl_cb *cb;
+ u32 handle;
+ int rc = 0;
+
+ /*
+ * handle was given to user to do mmap, I need to shift it back to
+ * how the idr module gave it to me
+ */
+ cb_handle >>= PAGE_SHIFT;
+ handle = (u32) cb_handle;
+
+ spin_lock(&mgr->cb_lock);
+
+ cb = idr_find(&mgr->cb_handles, handle);
+ if (cb) {
+ idr_remove(&mgr->cb_handles, handle);
+ spin_unlock(&mgr->cb_lock);
+ kref_put(&cb->refcount, cb_release);
+ } else {
+ spin_unlock(&mgr->cb_lock);
+ dev_err(hdev->dev,
+ "CB destroy failed, no match to handle 0x%x\n", handle);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ union hl_cb_args *args = data;
+ struct hl_device *hdev = hpriv->hdev;
+ u64 handle = 0;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't execute CB IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ return -EBUSY;
+ }
+
+ switch (args->in.op) {
+ case HL_CB_OP_CREATE:
+ if (args->in.cb_size > HL_MAX_CB_SIZE) {
+ dev_err(hdev->dev,
+ "User requested CB size %d must be less than %d\n",
+ args->in.cb_size, HL_MAX_CB_SIZE);
+ rc = -EINVAL;
+ } else {
+ rc = hl_cb_create(hdev, &hpriv->cb_mgr,
+ args->in.cb_size, &handle,
+ hpriv->ctx->asid);
+ }
+
+ memset(args, 0, sizeof(*args));
+ args->out.cb_handle = handle;
+ break;
+
+ case HL_CB_OP_DESTROY:
+ rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
+ args->in.cb_handle);
+ break;
+
+ default:
+ rc = -ENOTTY;
+ break;
+ }
+
+ return rc;
+}
+
+static void cb_vm_close(struct vm_area_struct *vma)
+{
+ struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data;
+ long new_mmap_size;
+
+ new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start);
+
+ if (new_mmap_size > 0) {
+ cb->mmap_size = new_mmap_size;
+ return;
+ }
+
+ spin_lock(&cb->lock);
+ cb->mmap = false;
+ spin_unlock(&cb->lock);
+
+ hl_cb_put(cb);
+ vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct cb_vm_ops = {
+ .close = cb_vm_close
+};
+
+int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_cb *cb;
+ phys_addr_t address;
+ u32 handle;
+ int rc;
+
+ handle = vma->vm_pgoff;
+
+ /* reference was taken here */
+ cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
+ if (!cb) {
+ dev_err(hdev->dev,
+ "CB mmap failed, no match to handle 0x%x\n", handle);
+ return -EINVAL;
+ }
+
+ /* Validation check */
+ if ((vma->vm_end - vma->vm_start) != ALIGN(cb->size, PAGE_SIZE)) {
+ dev_err(hdev->dev,
+ "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n",
+ vma->vm_end - vma->vm_start, cb->size);
+ rc = -EINVAL;
+ goto put_cb;
+ }
+
+ spin_lock(&cb->lock);
+
+ if (cb->mmap) {
+ dev_err(hdev->dev,
+ "CB mmap failed, CB already mmaped to user\n");
+ rc = -EINVAL;
+ goto release_lock;
+ }
+
+ cb->mmap = true;
+
+ spin_unlock(&cb->lock);
+
+ vma->vm_ops = &cb_vm_ops;
+
+ /*
+ * Note: We're transferring the cb reference to
+ * vma->vm_private_data here.
+ */
+
+ vma->vm_private_data = cb;
+
+ /* Calculate address for CB */
+ address = virt_to_phys((void *) (uintptr_t) cb->kernel_address);
+
+ rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
+ address, cb->size);
+
+ if (rc) {
+ spin_lock(&cb->lock);
+ cb->mmap = false;
+ goto release_lock;
+ }
+
+ cb->mmap_size = cb->size;
+
+ return 0;
+
+release_lock:
+ spin_unlock(&cb->lock);
+put_cb:
+ hl_cb_put(cb);
+ return rc;
+}
+
+struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+ u32 handle)
+{
+ struct hl_cb *cb;
+
+ spin_lock(&mgr->cb_lock);
+ cb = idr_find(&mgr->cb_handles, handle);
+
+ if (!cb) {
+ spin_unlock(&mgr->cb_lock);
+ dev_warn(hdev->dev,
+ "CB get failed, no match to handle 0x%x\n", handle);
+ return NULL;
+ }
+
+ kref_get(&cb->refcount);
+
+ spin_unlock(&mgr->cb_lock);
+
+ return cb;
+
+}
+
+void hl_cb_put(struct hl_cb *cb)
+{
+ kref_put(&cb->refcount, cb_release);
+}
+
+void hl_cb_mgr_init(struct hl_cb_mgr *mgr)
+{
+ spin_lock_init(&mgr->cb_lock);
+ idr_init(&mgr->cb_handles);
+}
+
+void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
+{
+ struct hl_cb *cb;
+ struct idr *idp;
+ u32 id;
+
+ idp = &mgr->cb_handles;
+
+ idr_for_each_entry(idp, cb, id) {
+ if (kref_put(&cb->refcount, cb_release) != 1)
+ dev_err(hdev->dev,
+ "CB %d for CTX ID %d is still alive\n",
+ id, cb->ctx_id);
+ }
+
+ idr_destroy(&mgr->cb_handles);
+}
+
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
+{
+ u64 cb_handle;
+ struct hl_cb *cb;
+ int rc;
+
+ rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
+ HL_KERNEL_ASID_ID);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to allocate CB for the kernel driver %d\n", rc);
+ return NULL;
+ }
+
+ cb_handle >>= PAGE_SHIFT;
+ cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
+ /* hl_cb_get should never fail here so use kernel WARN */
+ WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
+ if (!cb)
+ goto destroy_cb;
+
+ return cb;
+
+destroy_cb:
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT);
+
+ return NULL;
+}
+
+int hl_cb_pool_init(struct hl_device *hdev)
+{
+ struct hl_cb *cb;
+ int i;
+
+ INIT_LIST_HEAD(&hdev->cb_pool);
+ spin_lock_init(&hdev->cb_pool_lock);
+
+ for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
+ cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
+ HL_KERNEL_ASID_ID);
+ if (cb) {
+ cb->is_pool = true;
+ list_add(&cb->pool_list, &hdev->cb_pool);
+ } else {
+ hl_cb_pool_fini(hdev);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+int hl_cb_pool_fini(struct hl_device *hdev)
+{
+ struct hl_cb *cb, *tmp;
+
+ list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
+ list_del(&cb->pool_list);
+ cb_fini(hdev, cb);
+ }
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+#define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
+
+static void job_wq_completion(struct work_struct *work);
+static long _hl_cs_wait_ioctl(struct hl_device *hdev,
+ struct hl_ctx *ctx, u64 timeout_us, u64 seq);
+static void cs_do_release(struct kref *ref);
+
+static void hl_sob_reset(struct kref *ref)
+{
+ struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
+ kref);
+ struct hl_device *hdev = hw_sob->hdev;
+
+ hdev->asic_funcs->reset_sob(hdev, hw_sob);
+}
+
+void hl_sob_reset_error(struct kref *ref)
+{
+ struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
+ kref);
+ struct hl_device *hdev = hw_sob->hdev;
+
+ dev_crit(hdev->dev,
+ "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
+ hw_sob->q_idx, hw_sob->sob_id);
+}
+
+static const char *hl_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "HabanaLabs";
+}
+
+static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct hl_cs_compl *hl_cs_compl =
+ container_of(fence, struct hl_cs_compl, base_fence);
+
+ return dev_name(hl_cs_compl->hdev->dev);
+}
+
+static bool hl_fence_enable_signaling(struct dma_fence *fence)
+{
+ return true;
+}
+
+static void hl_fence_release(struct dma_fence *fence)
+{
+ struct hl_cs_compl *hl_cs_cmpl =
+ container_of(fence, struct hl_cs_compl, base_fence);
+ struct hl_device *hdev = hl_cs_cmpl->hdev;
+
+ /* EBUSY means the CS was never submitted and hence we don't have
+ * an attached hw_sob object that we should handle here
+ */
+ if (fence->error == -EBUSY)
+ goto free;
+
+ if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
+ (hl_cs_cmpl->type == CS_TYPE_WAIT)) {
+
+ dev_dbg(hdev->dev,
+ "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
+ hl_cs_cmpl->cs_seq,
+ hl_cs_cmpl->type,
+ hl_cs_cmpl->hw_sob->sob_id,
+ hl_cs_cmpl->sob_val);
+
+ /*
+ * A signal CS can get completion while the corresponding wait
+ * for signal CS is on its way to the PQ. The wait for signal CS
+ * will get stuck if the signal CS incremented the SOB to its
+ * max value and there are no pending (submitted) waits on this
+ * SOB.
+ * We do the following to void this situation:
+ * 1. The wait for signal CS must get a ref for the signal CS as
+ * soon as possible in cs_ioctl_signal_wait() and put it
+ * before being submitted to the PQ but after it incremented
+ * the SOB refcnt in init_signal_wait_cs().
+ * 2. Signal/Wait for signal CS will decrement the SOB refcnt
+ * here.
+ * These two measures guarantee that the wait for signal CS will
+ * reset the SOB upon completion rather than the signal CS and
+ * hence the above scenario is avoided.
+ */
+ kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+ }
+
+free:
+ kfree_rcu(hl_cs_cmpl, base_fence.rcu);
+}
+
+static const struct dma_fence_ops hl_fence_ops = {
+ .get_driver_name = hl_fence_get_driver_name,
+ .get_timeline_name = hl_fence_get_timeline_name,
+ .enable_signaling = hl_fence_enable_signaling,
+ .release = hl_fence_release
+};
+
+static void cs_get(struct hl_cs *cs)
+{
+ kref_get(&cs->refcount);
+}
+
+static int cs_get_unless_zero(struct hl_cs *cs)
+{
+ return kref_get_unless_zero(&cs->refcount);
+}
+
+static void cs_put(struct hl_cs *cs)
+{
+ kref_put(&cs->refcount, cs_do_release);
+}
+
+static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
+{
+ /*
+ * Patched CB is created for external queues jobs, and for H/W queues
+ * jobs if the user CB was allocated by driver and MMU is disabled.
+ */
+ return (job->queue_type == QUEUE_TYPE_EXT ||
+ (job->queue_type == QUEUE_TYPE_HW &&
+ job->is_kernel_allocated_cb &&
+ !hdev->mmu_enable));
+}
+
+/*
+ * cs_parser - parse the user command submission
+ *
+ * @hpriv : pointer to the private data of the fd
+ * @job : pointer to the job that holds the command submission info
+ *
+ * The function parses the command submission of the user. It calls the
+ * ASIC specific parser, which returns a list of memory blocks to send
+ * to the device as different command buffers
+ *
+ */
+static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_cs_parser parser;
+ int rc;
+
+ parser.ctx_id = job->cs->ctx->asid;
+ parser.cs_sequence = job->cs->sequence;
+ parser.job_id = job->id;
+
+ parser.hw_queue_id = job->hw_queue_id;
+ parser.job_userptr_list = &job->userptr_list;
+ parser.patched_cb = NULL;
+ parser.user_cb = job->user_cb;
+ parser.user_cb_size = job->user_cb_size;
+ parser.queue_type = job->queue_type;
+ parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
+ job->patched_cb = NULL;
+
+ rc = hdev->asic_funcs->cs_parser(hdev, &parser);
+
+ if (is_cb_patched(hdev, job)) {
+ if (!rc) {
+ job->patched_cb = parser.patched_cb;
+ job->job_cb_size = parser.patched_cb_size;
+ job->contains_dma_pkt = parser.contains_dma_pkt;
+
+ spin_lock(&job->patched_cb->lock);
+ job->patched_cb->cs_cnt++;
+ spin_unlock(&job->patched_cb->lock);
+ }
+
+ /*
+ * Whether the parsing worked or not, we don't need the
+ * original CB anymore because it was already parsed and
+ * won't be accessed again for this CS
+ */
+ spin_lock(&job->user_cb->lock);
+ job->user_cb->cs_cnt--;
+ spin_unlock(&job->user_cb->lock);
+ hl_cb_put(job->user_cb);
+ job->user_cb = NULL;
+ } else if (!rc) {
+ job->job_cb_size = job->user_cb_size;
+ }
+
+ return rc;
+}
+
+static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
+{
+ struct hl_cs *cs = job->cs;
+
+ if (is_cb_patched(hdev, job)) {
+ hl_userptr_delete_list(hdev, &job->userptr_list);
+
+ /*
+ * We might arrive here from rollback and patched CB wasn't
+ * created, so we need to check it's not NULL
+ */
+ if (job->patched_cb) {
+ spin_lock(&job->patched_cb->lock);
+ job->patched_cb->cs_cnt--;
+ spin_unlock(&job->patched_cb->lock);
+
+ hl_cb_put(job->patched_cb);
+ }
+ }
+
+ /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
+ * enabled, the user CB isn't released in cs_parser() and thus should be
+ * released here.
+ */
+ if (job->queue_type == QUEUE_TYPE_HW &&
+ job->is_kernel_allocated_cb && hdev->mmu_enable) {
+ spin_lock(&job->user_cb->lock);
+ job->user_cb->cs_cnt--;
+ spin_unlock(&job->user_cb->lock);
+
+ hl_cb_put(job->user_cb);
+ }
+
+ /*
+ * This is the only place where there can be multiple threads
+ * modifying the list at the same time
+ */
+ spin_lock(&cs->job_lock);
+ list_del(&job->cs_node);
+ spin_unlock(&cs->job_lock);
+
+ hl_debugfs_remove_job(hdev, job);
+
+ if (job->queue_type == QUEUE_TYPE_EXT ||
+ job->queue_type == QUEUE_TYPE_HW)
+ cs_put(cs);
+
+ kfree(job);
+}
+
+static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+ hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
+ ctx->cs_counters.device_in_reset_drop_cnt;
+ hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
+ ctx->cs_counters.out_of_mem_drop_cnt;
+ hdev->aggregated_cs_counters.parsing_drop_cnt +=
+ ctx->cs_counters.parsing_drop_cnt;
+ hdev->aggregated_cs_counters.queue_full_drop_cnt +=
+ ctx->cs_counters.queue_full_drop_cnt;
+}
+
+static void cs_do_release(struct kref *ref)
+{
+ struct hl_cs *cs = container_of(ref, struct hl_cs,
+ refcount);
+ struct hl_device *hdev = cs->ctx->hdev;
+ struct hl_cs_job *job, *tmp;
+
+ cs->completed = true;
+
+ /*
+ * Although if we reached here it means that all external jobs have
+ * finished, because each one of them took refcnt to CS, we still
+ * need to go over the internal jobs and free them. Otherwise, we
+ * will have leaked memory and what's worse, the CS object (and
+ * potentially the CTX object) could be released, while the JOB
+ * still holds a pointer to them (but no reference).
+ */
+ list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+ free_job(hdev, job);
+
+ /* We also need to update CI for internal queues */
+ if (cs->submitted) {
+ hdev->asic_funcs->hw_queues_lock(hdev);
+
+ hdev->cs_active_cnt--;
+ if (!hdev->cs_active_cnt) {
+ struct hl_device_idle_busy_ts *ts;
+
+ ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
+ ts->busy_to_idle_ts = ktime_get();
+
+ if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
+ hdev->idle_busy_ts_idx = 0;
+ } else if (hdev->cs_active_cnt < 0) {
+ dev_crit(hdev->dev, "CS active cnt %d is negative\n",
+ hdev->cs_active_cnt);
+ }
+
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ hl_int_hw_queue_update_ci(cs);
+
+ spin_lock(&hdev->hw_queues_mirror_lock);
+ /* remove CS from hw_queues mirror list */
+ list_del_init(&cs->mirror_node);
+ spin_unlock(&hdev->hw_queues_mirror_lock);
+
+ /*
+ * Don't cancel TDR in case this CS was timedout because we
+ * might be running from the TDR context
+ */
+ if ((!cs->timedout) &&
+ (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
+ struct hl_cs *next;
+
+ if (cs->tdr_active)
+ cancel_delayed_work_sync(&cs->work_tdr);
+
+ spin_lock(&hdev->hw_queues_mirror_lock);
+
+ /* queue TDR for next CS */
+ next = list_first_entry_or_null(
+ &hdev->hw_queues_mirror_list,
+ struct hl_cs, mirror_node);
+
+ if ((next) && (!next->tdr_active)) {
+ next->tdr_active = true;
+ schedule_delayed_work(&next->work_tdr,
+ hdev->timeout_jiffies);
+ }
+
+ spin_unlock(&hdev->hw_queues_mirror_lock);
+ }
+ } else if (cs->type == CS_TYPE_WAIT) {
+ /*
+ * In case the wait for signal CS was submitted, the put occurs
+ * in init_signal_wait_cs() right before hanging on the PQ.
+ */
+ dma_fence_put(cs->signal_fence);
+ }
+
+ /*
+ * Must be called before hl_ctx_put because inside we use ctx to get
+ * the device
+ */
+ hl_debugfs_remove_cs(cs);
+
+ hl_ctx_put(cs->ctx);
+
+ /* We need to mark an error for not submitted because in that case
+ * the dma fence release flow is different. Mainly, we don't need
+ * to handle hw_sob for signal/wait
+ */
+ if (cs->timedout)
+ dma_fence_set_error(cs->fence, -ETIMEDOUT);
+ else if (cs->aborted)
+ dma_fence_set_error(cs->fence, -EIO);
+ else if (!cs->submitted)
+ dma_fence_set_error(cs->fence, -EBUSY);
+
+ dma_fence_signal(cs->fence);
+ dma_fence_put(cs->fence);
+
+ cs_counters_aggregate(hdev, cs->ctx);
+
+ kfree(cs->jobs_in_queue_cnt);
+ kfree(cs);
+}
+
+static void cs_timedout(struct work_struct *work)
+{
+ struct hl_device *hdev;
+ int ctx_asid, rc;
+ struct hl_cs *cs = container_of(work, struct hl_cs,
+ work_tdr.work);
+ rc = cs_get_unless_zero(cs);
+ if (!rc)
+ return;
+
+ if ((!cs->submitted) || (cs->completed)) {
+ cs_put(cs);
+ return;
+ }
+
+ /* Mark the CS is timed out so we won't try to cancel its TDR */
+ cs->timedout = true;
+
+ hdev = cs->ctx->hdev;
+ ctx_asid = cs->ctx->asid;
+
+ dev_err(hdev->dev,
+ "Command submission %llu has not finished in time!\n",
+ cs->sequence);
+
+ cs_put(cs);
+
+ if (hdev->reset_on_lockup)
+ hl_device_reset(hdev, false, false);
+}
+
+static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
+ enum hl_cs_type cs_type, struct hl_cs **cs_new)
+{
+ struct hl_cs_compl *cs_cmpl;
+ struct dma_fence *other = NULL;
+ struct hl_cs *cs;
+ int rc;
+
+ cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
+ if (!cs)
+ return -ENOMEM;
+
+ cs->ctx = ctx;
+ cs->submitted = false;
+ cs->completed = false;
+ cs->type = cs_type;
+ INIT_LIST_HEAD(&cs->job_list);
+ INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
+ kref_init(&cs->refcount);
+ spin_lock_init(&cs->job_lock);
+
+ cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+ if (!cs_cmpl) {
+ rc = -ENOMEM;
+ goto free_cs;
+ }
+
+ cs_cmpl->hdev = hdev;
+ cs_cmpl->type = cs->type;
+ spin_lock_init(&cs_cmpl->lock);
+ cs->fence = &cs_cmpl->base_fence;
+
+ spin_lock(&ctx->cs_lock);
+
+ cs_cmpl->cs_seq = ctx->cs_sequence;
+ other = ctx->cs_pending[cs_cmpl->cs_seq &
+ (hdev->asic_prop.max_pending_cs - 1)];
+ if ((other) && (!dma_fence_is_signaled(other))) {
+ dev_dbg(hdev->dev,
+ "Rejecting CS because of too many in-flights CS\n");
+ rc = -EAGAIN;
+ goto free_fence;
+ }
+
+ cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+ sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+ if (!cs->jobs_in_queue_cnt) {
+ rc = -ENOMEM;
+ goto free_fence;
+ }
+
+ dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
+ ctx->asid, ctx->cs_sequence);
+
+ cs->sequence = cs_cmpl->cs_seq;
+
+ ctx->cs_pending[cs_cmpl->cs_seq &
+ (hdev->asic_prop.max_pending_cs - 1)] =
+ &cs_cmpl->base_fence;
+ ctx->cs_sequence++;
+
+ dma_fence_get(&cs_cmpl->base_fence);
+
+ dma_fence_put(other);
+
+ spin_unlock(&ctx->cs_lock);
+
+ *cs_new = cs;
+
+ return 0;
+
+free_fence:
+ spin_unlock(&ctx->cs_lock);
+ kfree(cs_cmpl);
+free_cs:
+ kfree(cs);
+ return rc;
+}
+
+static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
+{
+ struct hl_cs_job *job, *tmp;
+
+ list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+ free_job(hdev, job);
+}
+
+void hl_cs_rollback_all(struct hl_device *hdev)
+{
+ int i;
+ struct hl_cs *cs, *tmp;
+
+ /* flush all completions */
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ flush_workqueue(hdev->cq_wq[i]);
+
+ /* Make sure we don't have leftovers in the H/W queues mirror list */
+ list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
+ mirror_node) {
+ cs_get(cs);
+ cs->aborted = true;
+ dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
+ cs->ctx->asid, cs->sequence);
+ cs_rollback(hdev, cs);
+ cs_put(cs);
+ }
+}
+
+static void job_wq_completion(struct work_struct *work)
+{
+ struct hl_cs_job *job = container_of(work, struct hl_cs_job,
+ finish_work);
+ struct hl_cs *cs = job->cs;
+ struct hl_device *hdev = cs->ctx->hdev;
+
+ /* job is no longer needed */
+ free_job(hdev, job);
+}
+
+static int validate_queue_index(struct hl_device *hdev,
+ struct hl_cs_chunk *chunk,
+ enum hl_queue_type *queue_type,
+ bool *is_kernel_allocated_cb)
+{
+ struct asic_fixed_properties *asic = &hdev->asic_prop;
+ struct hw_queue_properties *hw_queue_prop;
+
+ /* This must be checked here to prevent out-of-bounds access to
+ * hw_queues_props array
+ */
+ if (chunk->queue_index >= asic->max_queues) {
+ dev_err(hdev->dev, "Queue index %d is invalid\n",
+ chunk->queue_index);
+ return -EINVAL;
+ }
+
+ hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
+
+ if (hw_queue_prop->type == QUEUE_TYPE_NA) {
+ dev_err(hdev->dev, "Queue index %d is invalid\n",
+ chunk->queue_index);
+ return -EINVAL;
+ }
+
+ if (hw_queue_prop->driver_only) {
+ dev_err(hdev->dev,
+ "Queue index %d is restricted for the kernel driver\n",
+ chunk->queue_index);
+ return -EINVAL;
+ }
+
+ *queue_type = hw_queue_prop->type;
+ *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
+
+ return 0;
+}
+
+static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
+ struct hl_cb_mgr *cb_mgr,
+ struct hl_cs_chunk *chunk)
+{
+ struct hl_cb *cb;
+ u32 cb_handle;
+
+ cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
+
+ cb = hl_cb_get(hdev, cb_mgr, cb_handle);
+ if (!cb) {
+ dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
+ return NULL;
+ }
+
+ if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
+ dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
+ goto release_cb;
+ }
+
+ spin_lock(&cb->lock);
+ cb->cs_cnt++;
+ spin_unlock(&cb->lock);
+
+ return cb;
+
+release_cb:
+ hl_cb_put(cb);
+ return NULL;
+}
+
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
+ enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
+{
+ struct hl_cs_job *job;
+
+ job = kzalloc(sizeof(*job), GFP_ATOMIC);
+ if (!job)
+ return NULL;
+
+ job->queue_type = queue_type;
+ job->is_kernel_allocated_cb = is_kernel_allocated_cb;
+
+ if (is_cb_patched(hdev, job))
+ INIT_LIST_HEAD(&job->userptr_list);
+
+ if (job->queue_type == QUEUE_TYPE_EXT)
+ INIT_WORK(&job->finish_work, job_wq_completion);
+
+ return job;
+}
+
+static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
+ u32 num_chunks, u64 *cs_seq)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_cs_chunk *cs_chunk_array;
+ struct hl_cs_job *job;
+ struct hl_cs *cs;
+ struct hl_cb *cb;
+ bool int_queues_only = true;
+ u32 size_to_copy;
+ int rc, i;
+
+ *cs_seq = ULLONG_MAX;
+
+ if (num_chunks > HL_MAX_JOBS_PER_CS) {
+ dev_err(hdev->dev,
+ "Number of chunks can NOT be larger than %d\n",
+ HL_MAX_JOBS_PER_CS);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+ GFP_ATOMIC);
+ if (!cs_chunk_array) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
+ if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+ dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
+ rc = -EFAULT;
+ goto free_cs_chunk_array;
+ }
+
+ /* increment refcnt for context */
+ hl_ctx_get(hdev, hpriv->ctx);
+
+ rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
+ if (rc) {
+ hl_ctx_put(hpriv->ctx);
+ goto free_cs_chunk_array;
+ }
+
+ *cs_seq = cs->sequence;
+
+ hl_debugfs_add_cs(cs);
+
+ /* Validate ALL the CS chunks before submitting the CS */
+ for (i = 0 ; i < num_chunks ; i++) {
+ struct hl_cs_chunk *chunk = &cs_chunk_array[i];
+ enum hl_queue_type queue_type;
+ bool is_kernel_allocated_cb;
+
+ rc = validate_queue_index(hdev, chunk, &queue_type,
+ &is_kernel_allocated_cb);
+ if (rc) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ goto free_cs_object;
+ }
+
+ if (is_kernel_allocated_cb) {
+ cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
+ if (!cb) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ rc = -EINVAL;
+ goto free_cs_object;
+ }
+ } else {
+ cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
+ }
+
+ if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
+ int_queues_only = false;
+
+ job = hl_cs_allocate_job(hdev, queue_type,
+ is_kernel_allocated_cb);
+ if (!job) {
+ hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ rc = -ENOMEM;
+ if (is_kernel_allocated_cb)
+ goto release_cb;
+ else
+ goto free_cs_object;
+ }
+
+ job->id = i + 1;
+ job->cs = cs;
+ job->user_cb = cb;
+ job->user_cb_size = chunk->cb_size;
+ job->hw_queue_id = chunk->queue_index;
+
+ cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+ list_add_tail(&job->cs_node, &cs->job_list);
+
+ /*
+ * Increment CS reference. When CS reference is 0, CS is
+ * done and can be signaled to user and free all its resources
+ * Only increment for JOB on external or H/W queues, because
+ * only for those JOBs we get completion
+ */
+ if (job->queue_type == QUEUE_TYPE_EXT ||
+ job->queue_type == QUEUE_TYPE_HW)
+ cs_get(cs);
+
+ hl_debugfs_add_job(hdev, job);
+
+ rc = cs_parser(hpriv, job);
+ if (rc) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ dev_err(hdev->dev,
+ "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
+ cs->ctx->asid, cs->sequence, job->id, rc);
+ goto free_cs_object;
+ }
+ }
+
+ if (int_queues_only) {
+ hpriv->ctx->cs_counters.parsing_drop_cnt++;
+ dev_err(hdev->dev,
+ "Reject CS %d.%llu because only internal queues jobs are present\n",
+ cs->ctx->asid, cs->sequence);
+ rc = -EINVAL;
+ goto free_cs_object;
+ }
+
+ rc = hl_hw_queue_schedule_cs(cs);
+ if (rc) {
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to submit CS %d.%llu to H/W queues, error %d\n",
+ cs->ctx->asid, cs->sequence, rc);
+ goto free_cs_object;
+ }
+
+ rc = HL_CS_STATUS_SUCCESS;
+ goto put_cs;
+
+release_cb:
+ spin_lock(&cb->lock);
+ cb->cs_cnt--;
+ spin_unlock(&cb->lock);
+ hl_cb_put(cb);
+free_cs_object:
+ cs_rollback(hdev, cs);
+ *cs_seq = ULLONG_MAX;
+ /* The path below is both for good and erroneous exits */
+put_cs:
+ /* We finished with the CS in this function, so put the ref */
+ cs_put(cs);
+free_cs_chunk_array:
+ kfree(cs_chunk_array);
+out:
+ return rc;
+}
+
+static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
+ void __user *chunks, u32 num_chunks,
+ u64 *cs_seq)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ctx *ctx = hpriv->ctx;
+ struct hl_cs_chunk *cs_chunk_array, *chunk;
+ struct hw_queue_properties *hw_queue_prop;
+ struct dma_fence *sig_fence = NULL;
+ struct hl_cs_job *job;
+ struct hl_cs *cs;
+ struct hl_cb *cb;
+ enum hl_queue_type q_type;
+ u64 *signal_seq_arr = NULL, signal_seq;
+ u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
+ int rc;
+
+ *cs_seq = ULLONG_MAX;
+
+ if (num_chunks > HL_MAX_JOBS_PER_CS) {
+ dev_err(hdev->dev,
+ "Number of chunks can NOT be larger than %d\n",
+ HL_MAX_JOBS_PER_CS);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+ GFP_ATOMIC);
+ if (!cs_chunk_array) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
+ if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+ dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
+ rc = -EFAULT;
+ goto free_cs_chunk_array;
+ }
+
+ /* currently it is guaranteed to have only one chunk */
+ chunk = &cs_chunk_array[0];
+ q_idx = chunk->queue_index;
+ hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+ q_type = hw_queue_prop->type;
+
+ if ((q_idx >= hdev->asic_prop.max_queues) ||
+ (!hw_queue_prop->supports_sync_stream)) {
+ dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
+ rc = -EINVAL;
+ goto free_cs_chunk_array;
+ }
+
+ if (cs_type == CS_TYPE_WAIT) {
+ struct hl_cs_compl *sig_waitcs_cmpl;
+
+ signal_seq_arr_len = chunk->num_signal_seq_arr;
+
+ /* currently only one signal seq is supported */
+ if (signal_seq_arr_len != 1) {
+ dev_err(hdev->dev,
+ "Wait for signal CS supports only one signal CS seq\n");
+ rc = -EINVAL;
+ goto free_cs_chunk_array;
+ }
+
+ signal_seq_arr = kmalloc_array(signal_seq_arr_len,
+ sizeof(*signal_seq_arr),
+ GFP_ATOMIC);
+ if (!signal_seq_arr) {
+ rc = -ENOMEM;
+ goto free_cs_chunk_array;
+ }
+
+ size_to_copy = chunk->num_signal_seq_arr *
+ sizeof(*signal_seq_arr);
+ if (copy_from_user(signal_seq_arr,
+ u64_to_user_ptr(chunk->signal_seq_arr),
+ size_to_copy)) {
+ dev_err(hdev->dev,
+ "Failed to copy signal seq array from user\n");
+ rc = -EFAULT;
+ goto free_signal_seq_array;
+ }
+
+ /* currently it is guaranteed to have only one signal seq */
+ signal_seq = signal_seq_arr[0];
+ sig_fence = hl_ctx_get_fence(ctx, signal_seq);
+ if (IS_ERR(sig_fence)) {
+ dev_err(hdev->dev,
+ "Failed to get signal CS with seq 0x%llx\n",
+ signal_seq);
+ rc = PTR_ERR(sig_fence);
+ goto free_signal_seq_array;
+ }
+
+ if (!sig_fence) {
+ /* signal CS already finished */
+ rc = 0;
+ goto free_signal_seq_array;
+ }
+
+ sig_waitcs_cmpl =
+ container_of(sig_fence, struct hl_cs_compl, base_fence);
+
+ if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+ dev_err(hdev->dev,
+ "CS seq 0x%llx is not of a signal CS\n",
+ signal_seq);
+ dma_fence_put(sig_fence);
+ rc = -EINVAL;
+ goto free_signal_seq_array;
+ }
+
+ if (dma_fence_is_signaled(sig_fence)) {
+ /* signal CS already finished */
+ dma_fence_put(sig_fence);
+ rc = 0;
+ goto free_signal_seq_array;
+ }
+ }
+
+ /* increment refcnt for context */
+ hl_ctx_get(hdev, ctx);
+
+ rc = allocate_cs(hdev, ctx, cs_type, &cs);
+ if (rc) {
+ if (cs_type == CS_TYPE_WAIT)
+ dma_fence_put(sig_fence);
+ hl_ctx_put(ctx);
+ goto free_signal_seq_array;
+ }
+
+ /*
+ * Save the signal CS fence for later initialization right before
+ * hanging the wait CS on the queue.
+ */
+ if (cs->type == CS_TYPE_WAIT)
+ cs->signal_fence = sig_fence;
+
+ hl_debugfs_add_cs(cs);
+
+ *cs_seq = cs->sequence;
+
+ job = hl_cs_allocate_job(hdev, q_type, true);
+ if (!job) {
+ ctx->cs_counters.out_of_mem_drop_cnt++;
+ dev_err(hdev->dev, "Failed to allocate a new job\n");
+ rc = -ENOMEM;
+ goto put_cs;
+ }
+
+ cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+ if (!cb) {
+ ctx->cs_counters.out_of_mem_drop_cnt++;
+ kfree(job);
+ rc = -EFAULT;
+ goto put_cs;
+ }
+
+ if (cs->type == CS_TYPE_WAIT)
+ cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
+ else
+ cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
+
+ job->id = 0;
+ job->cs = cs;
+ job->user_cb = cb;
+ job->user_cb->cs_cnt++;
+ job->user_cb_size = cb_size;
+ job->hw_queue_id = q_idx;
+
+ /*
+ * No need in parsing, user CB is the patched CB.
+ * We call hl_cb_destroy() out of two reasons - we don't need the CB in
+ * the CB idr anymore and to decrement its refcount as it was
+ * incremented inside hl_cb_kernel_create().
+ */
+ job->patched_cb = job->user_cb;
+ job->job_cb_size = job->user_cb_size;
+ hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+ cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+ list_add_tail(&job->cs_node, &cs->job_list);
+
+ /* increment refcount as for external queues we get completion */
+ cs_get(cs);
+
+ hl_debugfs_add_job(hdev, job);
+
+ rc = hl_hw_queue_schedule_cs(cs);
+ if (rc) {
+ if (rc != -EAGAIN)
+ dev_err(hdev->dev,
+ "Failed to submit CS %d.%llu to H/W queues, error %d\n",
+ ctx->asid, cs->sequence, rc);
+ goto free_cs_object;
+ }
+
+ rc = HL_CS_STATUS_SUCCESS;
+ goto put_cs;
+
+free_cs_object:
+ cs_rollback(hdev, cs);
+ *cs_seq = ULLONG_MAX;
+ /* The path below is both for good and erroneous exits */
+put_cs:
+ /* We finished with the CS in this function, so put the ref */
+ cs_put(cs);
+free_signal_seq_array:
+ if (cs_type == CS_TYPE_WAIT)
+ kfree(signal_seq_arr);
+free_cs_chunk_array:
+ kfree(cs_chunk_array);
+out:
+ return rc;
+}
+
+int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ union hl_cs_args *args = data;
+ struct hl_ctx *ctx = hpriv->ctx;
+ void __user *chunks_execute, *chunks_restore;
+ enum hl_cs_type cs_type;
+ u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
+ u64 cs_seq = ULONG_MAX;
+ int rc, do_ctx_switch;
+ bool need_soft_reset = false;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't submit new CS\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ rc = -EBUSY;
+ goto out;
+ }
+
+ sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
+
+ if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
+ dev_err(hdev->dev,
+ "Signal and wait CS flags are mutually exclusive, context %d\n",
+ ctx->asid);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
+ (!hdev->supports_sync_stream))) {
+ dev_err(hdev->dev, "Sync stream CS is not supported\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
+ cs_type = CS_TYPE_SIGNAL;
+ else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
+ cs_type = CS_TYPE_WAIT;
+ else
+ cs_type = CS_TYPE_DEFAULT;
+
+ chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
+ num_chunks_execute = args->in.num_chunks_execute;
+
+ if (cs_type == CS_TYPE_DEFAULT) {
+ if (!num_chunks_execute) {
+ dev_err(hdev->dev,
+ "Got execute CS with 0 chunks, context %d\n",
+ ctx->asid);
+ rc = -EINVAL;
+ goto out;
+ }
+ } else if (num_chunks_execute != 1) {
+ dev_err(hdev->dev,
+ "Sync stream CS mandates one chunk only, context %d\n",
+ ctx->asid);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
+
+ if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
+ long ret;
+
+ chunks_restore =
+ (void __user *) (uintptr_t) args->in.chunks_restore;
+ num_chunks_restore = args->in.num_chunks_restore;
+
+ mutex_lock(&hpriv->restore_phase_mutex);
+
+ if (do_ctx_switch) {
+ rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
+ if (rc) {
+ dev_err_ratelimited(hdev->dev,
+ "Failed to switch to context %d, rejecting CS! %d\n",
+ ctx->asid, rc);
+ /*
+ * If we timedout, or if the device is not IDLE
+ * while we want to do context-switch (-EBUSY),
+ * we need to soft-reset because QMAN is
+ * probably stuck. However, we can't call to
+ * reset here directly because of deadlock, so
+ * need to do it at the very end of this
+ * function
+ */
+ if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
+ need_soft_reset = true;
+ mutex_unlock(&hpriv->restore_phase_mutex);
+ goto out;
+ }
+ }
+
+ hdev->asic_funcs->restore_phase_topology(hdev);
+
+ if (!num_chunks_restore) {
+ dev_dbg(hdev->dev,
+ "Need to run restore phase but restore CS is empty\n");
+ rc = 0;
+ } else {
+ rc = cs_ioctl_default(hpriv, chunks_restore,
+ num_chunks_restore, &cs_seq);
+ }
+
+ mutex_unlock(&hpriv->restore_phase_mutex);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to submit restore CS for context %d (%d)\n",
+ ctx->asid, rc);
+ goto out;
+ }
+
+ /* Need to wait for restore completion before execution phase */
+ if (num_chunks_restore) {
+ ret = _hl_cs_wait_ioctl(hdev, ctx,
+ jiffies_to_usecs(hdev->timeout_jiffies),
+ cs_seq);
+ if (ret <= 0) {
+ dev_err(hdev->dev,
+ "Restore CS for context %d failed to complete %ld\n",
+ ctx->asid, ret);
+ rc = -ENOEXEC;
+ goto out;
+ }
+ }
+
+ ctx->thread_ctx_switch_wait_token = 1;
+ } else if (!ctx->thread_ctx_switch_wait_token) {
+ u32 tmp;
+
+ rc = hl_poll_timeout_memory(hdev,
+ &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
+ 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
+
+ if (rc == -ETIMEDOUT) {
+ dev_err(hdev->dev,
+ "context switch phase timeout (%d)\n", tmp);
+ goto out;
+ }
+ }
+
+ if (cs_type == CS_TYPE_DEFAULT)
+ rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
+ &cs_seq);
+ else
+ rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
+ num_chunks_execute, &cs_seq);
+
+out:
+ if (rc != -EAGAIN) {
+ memset(args, 0, sizeof(*args));
+ args->out.status = rc;
+ args->out.seq = cs_seq;
+ }
+
+ if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
+ hl_device_reset(hdev, false, false);
+
+ return rc;
+}
+
+static long _hl_cs_wait_ioctl(struct hl_device *hdev,
+ struct hl_ctx *ctx, u64 timeout_us, u64 seq)
+{
+ struct dma_fence *fence;
+ unsigned long timeout;
+ long rc;
+
+ if (timeout_us == MAX_SCHEDULE_TIMEOUT)
+ timeout = timeout_us;
+ else
+ timeout = usecs_to_jiffies(timeout_us);
+
+ hl_ctx_get(hdev, ctx);
+
+ fence = hl_ctx_get_fence(ctx, seq);
+ if (IS_ERR(fence)) {
+ rc = PTR_ERR(fence);
+ if (rc == -EINVAL)
+ dev_notice_ratelimited(hdev->dev,
+ "Can't wait on CS %llu because current CS is at seq %llu\n",
+ seq, ctx->cs_sequence);
+ } else if (fence) {
+ rc = dma_fence_wait_timeout(fence, true, timeout);
+ if (fence->error == -ETIMEDOUT)
+ rc = -ETIMEDOUT;
+ else if (fence->error == -EIO)
+ rc = -EIO;
+ dma_fence_put(fence);
+ } else {
+ dev_dbg(hdev->dev,
+ "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
+ seq, ctx->cs_sequence);
+ rc = 1;
+ }
+
+ hl_ctx_put(ctx);
+
+ return rc;
+}
+
+int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ union hl_wait_cs_args *args = data;
+ u64 seq = args->in.seq;
+ long rc;
+
+ rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
+
+ memset(args, 0, sizeof(*args));
+
+ if (rc < 0) {
+ if (rc == -ERESTARTSYS) {
+ dev_err_ratelimited(hdev->dev,
+ "user process got signal while waiting for CS handle %llu\n",
+ seq);
+ args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
+ rc = -EINTR;
+ } else if (rc == -ETIMEDOUT) {
+ dev_err_ratelimited(hdev->dev,
+ "CS %llu has timed-out while user process is waiting for it\n",
+ seq);
+ args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
+ } else if (rc == -EIO) {
+ dev_err_ratelimited(hdev->dev,
+ "CS %llu has been aborted while user process is waiting for it\n",
+ seq);
+ args->out.status = HL_WAIT_CS_STATUS_ABORTED;
+ }
+ return rc;
+ }
+
+ if (rc == 0)
+ args->out.status = HL_WAIT_CS_STATUS_BUSY;
+ else
+ args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+static void hl_ctx_fini(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ int i;
+
+ /*
+ * If we arrived here, there are no jobs waiting for this context
+ * on its queues so we can safely remove it.
+ * This is because for each CS, we increment the ref count and for
+ * every CS that was finished we decrement it and we won't arrive
+ * to this function unless the ref count is 0
+ */
+
+ for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
+ dma_fence_put(ctx->cs_pending[i]);
+
+ kfree(ctx->cs_pending);
+
+ if (ctx->asid != HL_KERNEL_ASID_ID) {
+ /* The engines are stopped as there is no executing CS, but the
+ * Coresight might be still working by accessing addresses
+ * related to the stopped engines. Hence stop it explicitly.
+ * Stop only if this is the compute context, as there can be
+ * only one compute context
+ */
+ if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
+ hl_device_set_debug_mode(hdev, false);
+
+ hl_vm_ctx_fini(ctx);
+ hl_asid_free(hdev, ctx->asid);
+ } else {
+ hl_mmu_ctx_fini(ctx);
+ }
+}
+
+void hl_ctx_do_release(struct kref *ref)
+{
+ struct hl_ctx *ctx;
+
+ ctx = container_of(ref, struct hl_ctx, refcount);
+
+ hl_ctx_fini(ctx);
+
+ if (ctx->hpriv)
+ hl_hpriv_put(ctx->hpriv);
+
+ kfree(ctx);
+}
+
+int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
+{
+ struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr;
+ struct hl_ctx *ctx;
+ int rc;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+
+ mutex_lock(&mgr->ctx_lock);
+ rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
+ mutex_unlock(&mgr->ctx_lock);
+
+ if (rc < 0) {
+ dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
+ goto free_ctx;
+ }
+
+ ctx->handle = rc;
+
+ rc = hl_ctx_init(hdev, ctx, false);
+ if (rc)
+ goto remove_from_idr;
+
+ hl_hpriv_get(hpriv);
+ ctx->hpriv = hpriv;
+
+ /* TODO: remove for multiple contexts per process */
+ hpriv->ctx = ctx;
+
+ /* TODO: remove the following line for multiple process support */
+ hdev->compute_ctx = ctx;
+
+ return 0;
+
+remove_from_idr:
+ mutex_lock(&mgr->ctx_lock);
+ idr_remove(&mgr->ctx_handles, ctx->handle);
+ mutex_unlock(&mgr->ctx_lock);
+free_ctx:
+ kfree(ctx);
+out_err:
+ return rc;
+}
+
+void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+ if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
+ return;
+
+ dev_warn(hdev->dev,
+ "user process released device but its command submissions are still executing\n");
+}
+
+int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
+{
+ int rc = 0;
+
+ ctx->hdev = hdev;
+
+ kref_init(&ctx->refcount);
+
+ ctx->cs_sequence = 1;
+ spin_lock_init(&ctx->cs_lock);
+ atomic_set(&ctx->thread_ctx_switch_token, 1);
+ ctx->thread_ctx_switch_wait_token = 0;
+ ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
+ sizeof(struct dma_fence *),
+ GFP_KERNEL);
+ if (!ctx->cs_pending)
+ return -ENOMEM;
+
+ if (is_kernel_ctx) {
+ ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
+ rc = hl_mmu_ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+ goto mem_ctx_err;
+ }
+ } else {
+ ctx->asid = hl_asid_alloc(hdev);
+ if (!ctx->asid) {
+ dev_err(hdev->dev, "No free ASID, failed to create context\n");
+ return -ENOMEM;
+ }
+
+ rc = hl_vm_ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init mem ctx module\n");
+ rc = -ENOMEM;
+ goto mem_ctx_err;
+ }
+ }
+
+ return 0;
+
+mem_ctx_err:
+ if (ctx->asid != HL_KERNEL_ASID_ID)
+ hl_asid_free(hdev, ctx->asid);
+
+ return rc;
+}
+
+void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+ kref_get(&ctx->refcount);
+}
+
+int hl_ctx_put(struct hl_ctx *ctx)
+{
+ return kref_put(&ctx->refcount, hl_ctx_do_release);
+}
+
+struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+{
+ struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
+ struct dma_fence *fence;
+
+ spin_lock(&ctx->cs_lock);
+
+ if (seq >= ctx->cs_sequence) {
+ spin_unlock(&ctx->cs_lock);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
+ spin_unlock(&ctx->cs_lock);
+ return NULL;
+ }
+
+ fence = dma_fence_get(
+ ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
+ spin_unlock(&ctx->cs_lock);
+
+ return fence;
+}
+
+/*
+ * hl_ctx_mgr_init - initialize the context manager
+ *
+ * @mgr: pointer to context manager structure
+ *
+ * This manager is an object inside the hpriv object of the user process.
+ * The function is called when a user process opens the FD.
+ */
+void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr)
+{
+ mutex_init(&mgr->ctx_lock);
+ idr_init(&mgr->ctx_handles);
+}
+
+/*
+ * hl_ctx_mgr_fini - finalize the context manager
+ *
+ * @hdev: pointer to device structure
+ * @mgr: pointer to context manager structure
+ *
+ * This function goes over all the contexts in the manager and frees them.
+ * It is called when a process closes the FD.
+ */
+void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
+{
+ struct hl_ctx *ctx;
+ struct idr *idp;
+ u32 id;
+
+ idp = &mgr->ctx_handles;
+
+ idr_for_each_entry(idp, ctx, id)
+ hl_ctx_free(hdev, ctx);
+
+ idr_destroy(&mgr->ctx_handles);
+ mutex_destroy(&mgr->ctx_lock);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#define MMU_ADDR_BUF_SIZE 40
+#define MMU_ASID_BUF_SIZE 10
+#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
+
+static struct dentry *hl_debug_root;
+
+static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
+ u8 i2c_reg, u32 *val)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -EBUSY;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.i2c_bus = i2c_bus;
+ pkt.i2c_addr = i2c_addr;
+ pkt.i2c_reg = i2c_reg;
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_DEVICE_TIMEOUT_USEC, (long *) val);
+
+ if (rc)
+ dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
+
+ return rc;
+}
+
+static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
+ u8 i2c_reg, u32 val)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -EBUSY;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.i2c_bus = i2c_bus;
+ pkt.i2c_addr = i2c_addr;
+ pkt.i2c_reg = i2c_reg;
+ pkt.value = cpu_to_le64(val);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_DEVICE_TIMEOUT_USEC, NULL);
+
+ if (rc)
+ dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
+
+ return rc;
+}
+
+static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.led_index = cpu_to_le32(led);
+ pkt.value = cpu_to_le64(state);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_DEVICE_TIMEOUT_USEC, NULL);
+
+ if (rc)
+ dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
+}
+
+static int command_buffers_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_cb *cb;
+ bool first = true;
+
+ spin_lock(&dev_entry->cb_spinlock);
+
+ list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
+ if (first) {
+ first = false;
+ seq_puts(s, "\n");
+ seq_puts(s, " CB ID CTX ID CB size CB RefCnt mmap? CS counter\n");
+ seq_puts(s, "---------------------------------------------------------------\n");
+ }
+ seq_printf(s,
+ " %03d %d 0x%08x %d %d %d\n",
+ cb->id, cb->ctx_id, cb->size,
+ kref_read(&cb->refcount),
+ cb->mmap, cb->cs_cnt);
+ }
+
+ spin_unlock(&dev_entry->cb_spinlock);
+
+ if (!first)
+ seq_puts(s, "\n");
+
+ return 0;
+}
+
+static int command_submission_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_cs *cs;
+ bool first = true;
+
+ spin_lock(&dev_entry->cs_spinlock);
+
+ list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
+ if (first) {
+ first = false;
+ seq_puts(s, "\n");
+ seq_puts(s, " CS ID CTX ASID CS RefCnt Submitted Completed\n");
+ seq_puts(s, "------------------------------------------------------\n");
+ }
+ seq_printf(s,
+ " %llu %d %d %d %d\n",
+ cs->sequence, cs->ctx->asid,
+ kref_read(&cs->refcount),
+ cs->submitted, cs->completed);
+ }
+
+ spin_unlock(&dev_entry->cs_spinlock);
+
+ if (!first)
+ seq_puts(s, "\n");
+
+ return 0;
+}
+
+static int command_submission_jobs_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_cs_job *job;
+ bool first = true;
+
+ spin_lock(&dev_entry->cs_job_spinlock);
+
+ list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
+ if (first) {
+ first = false;
+ seq_puts(s, "\n");
+ seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n");
+ seq_puts(s, "---------------------------------------\n");
+ }
+ if (job->cs)
+ seq_printf(s,
+ " %02d %llu %d %d\n",
+ job->id, job->cs->sequence, job->cs->ctx->asid,
+ job->hw_queue_id);
+ else
+ seq_printf(s,
+ " %02d 0 %d %d\n",
+ job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
+ }
+
+ spin_unlock(&dev_entry->cs_job_spinlock);
+
+ if (!first)
+ seq_puts(s, "\n");
+
+ return 0;
+}
+
+static int userptr_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_userptr *userptr;
+ char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+ "DMA_FROM_DEVICE", "DMA_NONE"};
+ bool first = true;
+
+ spin_lock(&dev_entry->userptr_spinlock);
+
+ list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
+ if (first) {
+ first = false;
+ seq_puts(s, "\n");
+ seq_puts(s, " user virtual address size dma dir\n");
+ seq_puts(s, "----------------------------------------------------------\n");
+ }
+ seq_printf(s,
+ " 0x%-14llx %-10u %-30s\n",
+ userptr->addr, userptr->size, dma_dir[userptr->dir]);
+ }
+
+ spin_unlock(&dev_entry->userptr_spinlock);
+
+ if (!first)
+ seq_puts(s, "\n");
+
+ return 0;
+}
+
+static int vm_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_ctx *ctx;
+ struct hl_vm *vm;
+ struct hl_vm_hash_node *hnode;
+ struct hl_userptr *userptr;
+ struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+ enum vm_type_t *vm_type;
+ bool once = true;
+ u64 j;
+ int i;
+
+ if (!dev_entry->hdev->mmu_enable)
+ return 0;
+
+ spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+
+ list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
+ once = false;
+ seq_puts(s, "\n\n----------------------------------------------------");
+ seq_puts(s, "\n----------------------------------------------------\n\n");
+ seq_printf(s, "ctx asid: %u\n", ctx->asid);
+
+ seq_puts(s, "\nmappings:\n\n");
+ seq_puts(s, " virtual address size handle\n");
+ seq_puts(s, "----------------------------------------------------\n");
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_for_each(ctx->mem_hash, i, hnode, node) {
+ vm_type = hnode->ptr;
+
+ if (*vm_type == VM_TYPE_USERPTR) {
+ userptr = hnode->ptr;
+ seq_printf(s,
+ " 0x%-14llx %-10u\n",
+ hnode->vaddr, userptr->size);
+ } else {
+ phys_pg_pack = hnode->ptr;
+ seq_printf(s,
+ " 0x%-14llx %-10llu %-4u\n",
+ hnode->vaddr, phys_pg_pack->total_size,
+ phys_pg_pack->handle);
+ }
+ }
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ vm = &ctx->hdev->vm;
+ spin_lock(&vm->idr_lock);
+
+ if (!idr_is_empty(&vm->phys_pg_pack_handles))
+ seq_puts(s, "\n\nallocations:\n");
+
+ idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
+ if (phys_pg_pack->asid != ctx->asid)
+ continue;
+
+ seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
+ seq_printf(s, "page size: %u\n\n",
+ phys_pg_pack->page_size);
+ seq_puts(s, " physical address\n");
+ seq_puts(s, "---------------------\n");
+ for (j = 0 ; j < phys_pg_pack->npages ; j++) {
+ seq_printf(s, " 0x%-14llx\n",
+ phys_pg_pack->pages[j]);
+ }
+ }
+ spin_unlock(&vm->idr_lock);
+
+ }
+
+ spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+
+ if (!once)
+ seq_puts(s, "\n");
+
+ return 0;
+}
+
+/* these inline functions are copied from mmu.c */
+static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+{
+ return ctx->hdev->asic_prop.mmu_pgt_addr +
+ (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+ u64 virt_addr, u64 mask, u64 shift)
+{
+ return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
+ ((virt_addr & mask) >> shift);
+}
+
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
+ mmu_specs->hop0_shift);
+}
+
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
+ mmu_specs->hop1_shift);
+}
+
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
+ mmu_specs->hop2_shift);
+}
+
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
+ mmu_specs->hop3_shift);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_specs,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
+ mmu_specs->hop4_shift);
+}
+
+static inline u64 get_next_hop_addr(u64 curr_pte)
+{
+ if (curr_pte & PAGE_PRESENT_MASK)
+ return curr_pte & HOP_PHYS_ADDR_MASK;
+ else
+ return ULLONG_MAX;
+}
+
+static int mmu_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ struct hl_ctx *ctx;
+ bool is_dram_addr;
+
+ u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
+ hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
+ hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
+ hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
+ hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
+ virt_addr = dev_entry->mmu_addr;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
+ ctx = hdev->kernel_ctx;
+ else
+ ctx = hdev->compute_ctx;
+
+ if (!ctx) {
+ dev_err(hdev->dev, "no ctx available\n");
+ return 0;
+ }
+
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+
+ /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+ mutex_lock(&ctx->mmu_lock);
+
+ /* the following lookup is copied from unmap() in mmu.c */
+
+ hop0_addr = get_hop0_addr(ctx);
+ hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
+ hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+ hop1_addr = get_next_hop_addr(hop0_pte);
+
+ if (hop1_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
+ hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+ hop2_addr = get_next_hop_addr(hop1_pte);
+
+ if (hop2_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
+ hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+ hop3_addr = get_next_hop_addr(hop2_pte);
+
+ if (hop3_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+ hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+
+ if (!(hop3_pte & LAST_MASK)) {
+ hop4_addr = get_next_hop_addr(hop3_pte);
+
+ if (hop4_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+ virt_addr);
+ hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+ if (!(hop4_pte & PAGE_PRESENT_MASK))
+ goto not_mapped;
+ } else {
+ if (!(hop3_pte & PAGE_PRESENT_MASK))
+ goto not_mapped;
+ }
+
+ seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
+ dev_entry->mmu_asid, dev_entry->mmu_addr);
+
+ seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr);
+ seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr);
+ seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte);
+
+ seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr);
+ seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr);
+ seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte);
+
+ seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr);
+ seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr);
+ seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte);
+
+ seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr);
+ seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
+ seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
+
+ if (!(hop3_pte & LAST_MASK)) {
+ seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
+ seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
+ seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
+ }
+
+ goto out;
+
+not_mapped:
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+out:
+ mutex_unlock(&ctx->mmu_lock);
+
+ return 0;
+}
+
+static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ struct seq_file *s = file->private_data;
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+ char kbuf[MMU_KBUF_SIZE];
+ char *c;
+ ssize_t rc;
+
+ if (!hdev->mmu_enable)
+ return count;
+
+ if (count > sizeof(kbuf) - 1)
+ goto err;
+ if (copy_from_user(kbuf, buf, count))
+ goto err;
+ kbuf[count] = 0;
+
+ c = strchr(kbuf, ' ');
+ if (!c)
+ goto err;
+ *c = '\0';
+
+ rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
+ if (rc)
+ goto err;
+
+ if (strncmp(c+1, "0x", 2))
+ goto err;
+ rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
+ if (rc)
+ goto err;
+
+ return count;
+
+err:
+ dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
+
+ return -EINVAL;
+}
+
+static int engines_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Can't check device idle during reset\n");
+ return 0;
+ }
+
+ hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+
+ return 0;
+}
+
+static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ if (!hdev->mmu_enable)
+ goto out;
+
+ if (hdev->dram_supports_virtual_memory &&
+ (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
+ return true;
+
+ if (addr >= prop->pmmu.start_addr &&
+ addr < prop->pmmu.end_addr)
+ return true;
+
+ if (addr >= prop->pmmu_huge.start_addr &&
+ addr < prop->pmmu_huge.end_addr)
+ return true;
+out:
+ return false;
+}
+
+static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
+ u64 *phys_addr)
+{
+ struct hl_ctx *ctx = hdev->compute_ctx;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ u64 hop_addr, hop_pte_addr, hop_pte;
+ u64 offset_mask = HOP4_MASK | FLAGS_MASK;
+ int rc = 0;
+ bool is_dram_addr;
+
+ if (!ctx) {
+ dev_err(hdev->dev, "no ctx available\n");
+ return -EINVAL;
+ }
+
+ is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+
+ /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+ mutex_lock(&ctx->mmu_lock);
+
+ /* hop 0 */
+ hop_addr = get_hop0_addr(ctx);
+ hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ /* hop 1 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ /* hop 2 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ /* hop 3 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ if (!(hop_pte & LAST_MASK)) {
+ /* hop 4 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
+ virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ offset_mask = FLAGS_MASK;
+ }
+
+ if (!(hop_pte & PAGE_PRESENT_MASK))
+ goto not_mapped;
+
+ *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask);
+
+ goto out;
+
+not_mapped:
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+ rc = -EINVAL;
+out:
+ mutex_unlock(&ctx->mmu_lock);
+ return rc;
+}
+
+static ssize_t hl_data_read32(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[32];
+ u64 addr = entry->addr;
+ u32 val;
+ ssize_t rc;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
+ return 0;
+ }
+
+ if (*ppos)
+ return 0;
+
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+ return rc;
+ }
+
+ sprintf(tmp_buf, "0x%08x\n", val);
+ return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ strlen(tmp_buf));
+}
+
+static ssize_t hl_data_write32(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u64 addr = entry->addr;
+ u32 value;
+ ssize_t rc;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
+ return 0;
+ }
+
+ rc = kstrtouint_from_user(buf, count, 16, &value);
+ if (rc)
+ return rc;
+
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
+ value, addr);
+ return rc;
+ }
+
+ return count;
+}
+
+static ssize_t hl_data_read64(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[32];
+ u64 addr = entry->addr;
+ u64 val;
+ ssize_t rc;
+
+ if (*ppos)
+ return 0;
+
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+ return rc;
+ }
+
+ sprintf(tmp_buf, "0x%016llx\n", val);
+ return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ strlen(tmp_buf));
+}
+
+static ssize_t hl_data_write64(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u64 addr = entry->addr;
+ u64 value;
+ ssize_t rc;
+
+ rc = kstrtoull_from_user(buf, count, 16, &value);
+ if (rc)
+ return rc;
+
+ if (hl_is_device_va(hdev, addr)) {
+ rc = device_va_to_pa(hdev, addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
+ value, addr);
+ return rc;
+ }
+
+ return count;
+}
+
+static ssize_t hl_get_power_state(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[200];
+ int i;
+
+ if (*ppos)
+ return 0;
+
+ if (hdev->pdev->current_state == PCI_D0)
+ i = 1;
+ else if (hdev->pdev->current_state == PCI_D3hot)
+ i = 2;
+ else
+ i = 3;
+
+ sprintf(tmp_buf,
+ "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
+ return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ strlen(tmp_buf));
+}
+
+static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ rc = kstrtouint_from_user(buf, count, 10, &value);
+ if (rc)
+ return rc;
+
+ if (value == 1) {
+ pci_set_power_state(hdev->pdev, PCI_D0);
+ pci_restore_state(hdev->pdev);
+ rc = pci_enable_device(hdev->pdev);
+ } else if (value == 2) {
+ pci_save_state(hdev->pdev);
+ pci_disable_device(hdev->pdev);
+ pci_set_power_state(hdev->pdev, PCI_D3hot);
+ } else {
+ dev_dbg(hdev->dev, "invalid power state value %u\n", value);
+ return -EINVAL;
+ }
+
+ return count;
+}
+
+static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[32];
+ u32 val;
+ ssize_t rc;
+
+ if (*ppos)
+ return 0;
+
+ rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
+ entry->i2c_reg, &val);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to read from I2C bus %d, addr %d, reg %d\n",
+ entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+ return rc;
+ }
+
+ sprintf(tmp_buf, "0x%02x\n", val);
+ rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ strlen(tmp_buf));
+
+ return rc;
+}
+
+static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ rc = kstrtouint_from_user(buf, count, 16, &value);
+ if (rc)
+ return rc;
+
+ rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
+ entry->i2c_reg, value);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n",
+ value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+ return rc;
+ }
+
+ return count;
+}
+
+static ssize_t hl_led0_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ rc = kstrtouint_from_user(buf, count, 10, &value);
+ if (rc)
+ return rc;
+
+ value = value ? 1 : 0;
+
+ hl_debugfs_led_set(hdev, 0, value);
+
+ return count;
+}
+
+static ssize_t hl_led1_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ rc = kstrtouint_from_user(buf, count, 10, &value);
+ if (rc)
+ return rc;
+
+ value = value ? 1 : 0;
+
+ hl_debugfs_led_set(hdev, 1, value);
+
+ return count;
+}
+
+static ssize_t hl_led2_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ rc = kstrtouint_from_user(buf, count, 10, &value);
+ if (rc)
+ return rc;
+
+ value = value ? 1 : 0;
+
+ hl_debugfs_led_set(hdev, 2, value);
+
+ return count;
+}
+
+static ssize_t hl_device_read(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ static const char *help =
+ "Valid values: disable, enable, suspend, resume, cpu_timeout\n";
+ return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
+}
+
+static ssize_t hl_device_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char data[30] = {0};
+
+ /* don't allow partial writes */
+ if (*ppos != 0)
+ return 0;
+
+ simple_write_to_buffer(data, 29, ppos, buf, count);
+
+ if (strncmp("disable", data, strlen("disable")) == 0) {
+ hdev->disabled = true;
+ } else if (strncmp("enable", data, strlen("enable")) == 0) {
+ hdev->disabled = false;
+ } else if (strncmp("suspend", data, strlen("suspend")) == 0) {
+ hdev->asic_funcs->suspend(hdev);
+ } else if (strncmp("resume", data, strlen("resume")) == 0) {
+ hdev->asic_funcs->resume(hdev);
+ } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
+ hdev->device_cpu_disabled = true;
+ } else {
+ dev_err(hdev->dev,
+ "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
+ count = -EINVAL;
+ }
+
+ return count;
+}
+
+static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[200];
+ ssize_t rc;
+
+ if (*ppos)
+ return 0;
+
+ sprintf(tmp_buf, "%d\n", hdev->clock_gating);
+ rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+ strlen(tmp_buf) + 1);
+
+ return rc;
+}
+
+static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Can't change clock gating during reset\n");
+ return 0;
+ }
+
+ rc = kstrtouint_from_user(buf, count, 10, &value);
+ if (rc)
+ return rc;
+
+ if (value) {
+ hdev->clock_gating = 1;
+ if (hdev->asic_funcs->enable_clock_gating)
+ hdev->asic_funcs->enable_clock_gating(hdev);
+ } else {
+ if (hdev->asic_funcs->disable_clock_gating)
+ hdev->asic_funcs->disable_clock_gating(hdev);
+ hdev->clock_gating = 0;
+ }
+
+ return count;
+}
+
+static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ char tmp_buf[200];
+ ssize_t rc;
+
+ if (*ppos)
+ return 0;
+
+ sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
+ rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+ strlen(tmp_buf) + 1);
+
+ return rc;
+}
+
+static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ struct hl_device *hdev = entry->hdev;
+ u32 value;
+ ssize_t rc;
+
+ if (atomic_read(&hdev->in_reset)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Can't change stop on error during reset\n");
+ return 0;
+ }
+
+ rc = kstrtouint_from_user(buf, count, 10, &value);
+ if (rc)
+ return rc;
+
+ hdev->stop_on_err = value ? 1 : 0;
+
+ hl_device_reset(hdev, false, false);
+
+ return count;
+}
+
+static const struct file_operations hl_data32b_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_data_read32,
+ .write = hl_data_write32
+};
+
+static const struct file_operations hl_data64b_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_data_read64,
+ .write = hl_data_write64
+};
+
+static const struct file_operations hl_i2c_data_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_i2c_data_read,
+ .write = hl_i2c_data_write
+};
+
+static const struct file_operations hl_power_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_get_power_state,
+ .write = hl_set_power_state
+};
+
+static const struct file_operations hl_led0_fops = {
+ .owner = THIS_MODULE,
+ .write = hl_led0_write
+};
+
+static const struct file_operations hl_led1_fops = {
+ .owner = THIS_MODULE,
+ .write = hl_led1_write
+};
+
+static const struct file_operations hl_led2_fops = {
+ .owner = THIS_MODULE,
+ .write = hl_led2_write
+};
+
+static const struct file_operations hl_device_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_device_read,
+ .write = hl_device_write
+};
+
+static const struct file_operations hl_clk_gate_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_clk_gate_read,
+ .write = hl_clk_gate_write
+};
+
+static const struct file_operations hl_stop_on_err_fops = {
+ .owner = THIS_MODULE,
+ .read = hl_stop_on_err_read,
+ .write = hl_stop_on_err_write
+};
+
+static const struct hl_info_list hl_debugfs_list[] = {
+ {"command_buffers", command_buffers_show, NULL},
+ {"command_submission", command_submission_show, NULL},
+ {"command_submission_jobs", command_submission_jobs_show, NULL},
+ {"userptr", userptr_show, NULL},
+ {"vm", vm_show, NULL},
+ {"mmu", mmu_show, mmu_asid_va_write},
+ {"engines", engines_show, NULL}
+};
+
+static int hl_debugfs_open(struct inode *inode, struct file *file)
+{
+ struct hl_debugfs_entry *node = inode->i_private;
+
+ return single_open(file, node->info_ent->show, node);
+}
+
+static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ struct hl_debugfs_entry *node = file->f_inode->i_private;
+
+ if (node->info_ent->write)
+ return node->info_ent->write(file, buf, count, f_pos);
+ else
+ return -EINVAL;
+
+}
+
+static const struct file_operations hl_debugfs_fops = {
+ .owner = THIS_MODULE,
+ .open = hl_debugfs_open,
+ .read = seq_read,
+ .write = hl_debugfs_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void hl_debugfs_add_device(struct hl_device *hdev)
+{
+ struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ int count = ARRAY_SIZE(hl_debugfs_list);
+ struct hl_debugfs_entry *entry;
+ struct dentry *ent;
+ int i;
+
+ dev_entry->hdev = hdev;
+ dev_entry->entry_arr = kmalloc_array(count,
+ sizeof(struct hl_debugfs_entry),
+ GFP_KERNEL);
+ if (!dev_entry->entry_arr)
+ return;
+
+ INIT_LIST_HEAD(&dev_entry->file_list);
+ INIT_LIST_HEAD(&dev_entry->cb_list);
+ INIT_LIST_HEAD(&dev_entry->cs_list);
+ INIT_LIST_HEAD(&dev_entry->cs_job_list);
+ INIT_LIST_HEAD(&dev_entry->userptr_list);
+ INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
+ mutex_init(&dev_entry->file_mutex);
+ spin_lock_init(&dev_entry->cb_spinlock);
+ spin_lock_init(&dev_entry->cs_spinlock);
+ spin_lock_init(&dev_entry->cs_job_spinlock);
+ spin_lock_init(&dev_entry->userptr_spinlock);
+ spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);
+
+ dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
+ hl_debug_root);
+
+ debugfs_create_x64("addr",
+ 0644,
+ dev_entry->root,
+ &dev_entry->addr);
+
+ debugfs_create_file("data32",
+ 0644,
+ dev_entry->root,
+ dev_entry,
+ &hl_data32b_fops);
+
+ debugfs_create_file("data64",
+ 0644,
+ dev_entry->root,
+ dev_entry,
+ &hl_data64b_fops);
+
+ debugfs_create_file("set_power_state",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_power_fops);
+
+ debugfs_create_u8("i2c_bus",
+ 0644,
+ dev_entry->root,
+ &dev_entry->i2c_bus);
+
+ debugfs_create_u8("i2c_addr",
+ 0644,
+ dev_entry->root,
+ &dev_entry->i2c_addr);
+
+ debugfs_create_u8("i2c_reg",
+ 0644,
+ dev_entry->root,
+ &dev_entry->i2c_reg);
+
+ debugfs_create_file("i2c_data",
+ 0644,
+ dev_entry->root,
+ dev_entry,
+ &hl_i2c_data_fops);
+
+ debugfs_create_file("led0",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_led0_fops);
+
+ debugfs_create_file("led1",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_led1_fops);
+
+ debugfs_create_file("led2",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_led2_fops);
+
+ debugfs_create_file("device",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_device_fops);
+
+ debugfs_create_file("clk_gate",
+ 0200,
+ dev_entry->root,
+ dev_entry,
+ &hl_clk_gate_fops);
+
+ debugfs_create_file("stop_on_err",
+ 0644,
+ dev_entry->root,
+ dev_entry,
+ &hl_stop_on_err_fops);
+
+ for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+
+ ent = debugfs_create_file(hl_debugfs_list[i].name,
+ 0444,
+ dev_entry->root,
+ entry,
+ &hl_debugfs_fops);
+ entry->dent = ent;
+ entry->info_ent = &hl_debugfs_list[i];
+ entry->dev_entry = dev_entry;
+ }
+}
+
+void hl_debugfs_remove_device(struct hl_device *hdev)
+{
+ struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
+
+ debugfs_remove_recursive(entry->root);
+
+ mutex_destroy(&entry->file_mutex);
+ kfree(entry->entry_arr);
+}
+
+void hl_debugfs_add_file(struct hl_fpriv *hpriv)
+{
+ struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
+
+ mutex_lock(&dev_entry->file_mutex);
+ list_add(&hpriv->debugfs_list, &dev_entry->file_list);
+ mutex_unlock(&dev_entry->file_mutex);
+}
+
+void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
+{
+ struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
+
+ mutex_lock(&dev_entry->file_mutex);
+ list_del(&hpriv->debugfs_list);
+ mutex_unlock(&dev_entry->file_mutex);
+}
+
+void hl_debugfs_add_cb(struct hl_cb *cb)
+{
+ struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->cb_spinlock);
+ list_add(&cb->debugfs_list, &dev_entry->cb_list);
+ spin_unlock(&dev_entry->cb_spinlock);
+}
+
+void hl_debugfs_remove_cb(struct hl_cb *cb)
+{
+ struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->cb_spinlock);
+ list_del(&cb->debugfs_list);
+ spin_unlock(&dev_entry->cb_spinlock);
+}
+
+void hl_debugfs_add_cs(struct hl_cs *cs)
+{
+ struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->cs_spinlock);
+ list_add(&cs->debugfs_list, &dev_entry->cs_list);
+ spin_unlock(&dev_entry->cs_spinlock);
+}
+
+void hl_debugfs_remove_cs(struct hl_cs *cs)
+{
+ struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->cs_spinlock);
+ list_del(&cs->debugfs_list);
+ spin_unlock(&dev_entry->cs_spinlock);
+}
+
+void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
+{
+ struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->cs_job_spinlock);
+ list_add(&job->debugfs_list, &dev_entry->cs_job_list);
+ spin_unlock(&dev_entry->cs_job_spinlock);
+}
+
+void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
+{
+ struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->cs_job_spinlock);
+ list_del(&job->debugfs_list);
+ spin_unlock(&dev_entry->cs_job_spinlock);
+}
+
+void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
+{
+ struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->userptr_spinlock);
+ list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
+ spin_unlock(&dev_entry->userptr_spinlock);
+}
+
+void hl_debugfs_remove_userptr(struct hl_device *hdev,
+ struct hl_userptr *userptr)
+{
+ struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->userptr_spinlock);
+ list_del(&userptr->debugfs_list);
+ spin_unlock(&dev_entry->userptr_spinlock);
+}
+
+void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+ struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+ list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
+ spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+}
+
+void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
+{
+ struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+ spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+ list_del(&ctx->debugfs_list);
+ spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+}
+
+void __init hl_debugfs_init(void)
+{
+ hl_debug_root = debugfs_create_dir("habanalabs", NULL);
+}
+
+void hl_debugfs_fini(void)
+{
+ debugfs_remove_recursive(hl_debug_root);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#define pr_fmt(fmt) "habanalabs: " fmt
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+#include <linux/sched/signal.h>
+#include <linux/hwmon.h>
+#include <uapi/misc/habanalabs.h>
+
+#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
+
+bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
+{
+ if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
+ return true;
+ else
+ return false;
+}
+
+enum hl_device_status hl_device_status(struct hl_device *hdev)
+{
+ enum hl_device_status status;
+
+ if (hdev->disabled)
+ status = HL_DEVICE_STATUS_MALFUNCTION;
+ else if (atomic_read(&hdev->in_reset))
+ status = HL_DEVICE_STATUS_IN_RESET;
+ else
+ status = HL_DEVICE_STATUS_OPERATIONAL;
+
+ return status;
+}
+
+static void hpriv_release(struct kref *ref)
+{
+ struct hl_fpriv *hpriv;
+ struct hl_device *hdev;
+
+ hpriv = container_of(ref, struct hl_fpriv, refcount);
+
+ hdev = hpriv->hdev;
+
+ put_pid(hpriv->taskpid);
+
+ hl_debugfs_remove_file(hpriv);
+
+ mutex_destroy(&hpriv->restore_phase_mutex);
+
+ mutex_lock(&hdev->fpriv_list_lock);
+ list_del(&hpriv->dev_node);
+ hdev->compute_ctx = NULL;
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ kfree(hpriv);
+}
+
+void hl_hpriv_get(struct hl_fpriv *hpriv)
+{
+ kref_get(&hpriv->refcount);
+}
+
+void hl_hpriv_put(struct hl_fpriv *hpriv)
+{
+ kref_put(&hpriv->refcount, hpriv_release);
+}
+
+/*
+ * hl_device_release - release function for habanalabs device
+ *
+ * @inode: pointer to inode structure
+ * @filp: pointer to file structure
+ *
+ * Called when process closes an habanalabs device
+ */
+static int hl_device_release(struct inode *inode, struct file *filp)
+{
+ struct hl_fpriv *hpriv = filp->private_data;
+
+ hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+ hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+
+ filp->private_data = NULL;
+
+ hl_hpriv_put(hpriv);
+
+ return 0;
+}
+
+static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
+{
+ struct hl_fpriv *hpriv = filp->private_data;
+ struct hl_device *hdev;
+
+ filp->private_data = NULL;
+
+ hdev = hpriv->hdev;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+ list_del(&hpriv->dev_node);
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ kfree(hpriv);
+
+ return 0;
+}
+
+/*
+ * hl_mmap - mmap function for habanalabs device
+ *
+ * @*filp: pointer to file structure
+ * @*vma: pointer to vm_area_struct of the process
+ *
+ * Called when process does an mmap on habanalabs device. Call the device's mmap
+ * function at the end of the common code.
+ */
+static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct hl_fpriv *hpriv = filp->private_data;
+
+ if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
+ vma->vm_pgoff ^= HL_MMAP_CB_MASK;
+ return hl_cb_mmap(hpriv, vma);
+ }
+
+ return -EINVAL;
+}
+
+static const struct file_operations hl_ops = {
+ .owner = THIS_MODULE,
+ .open = hl_device_open,
+ .release = hl_device_release,
+ .mmap = hl_mmap,
+ .unlocked_ioctl = hl_ioctl,
+ .compat_ioctl = hl_ioctl
+};
+
+static const struct file_operations hl_ctrl_ops = {
+ .owner = THIS_MODULE,
+ .open = hl_device_open_ctrl,
+ .release = hl_device_release_ctrl,
+ .unlocked_ioctl = hl_ioctl_control,
+ .compat_ioctl = hl_ioctl_control
+};
+
+static void device_release_func(struct device *dev)
+{
+ kfree(dev);
+}
+
+/*
+ * device_init_cdev - Initialize cdev and device for habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @hclass: pointer to the class object of the device
+ * @minor: minor number of the specific device
+ * @fpos: file operations to install for this device
+ * @name: name of the device as it will appear in the filesystem
+ * @cdev: pointer to the char device object that will be initialized
+ * @dev: pointer to the device object that will be initialized
+ *
+ * Initialize a cdev and a Linux device for habanalabs's device.
+ */
+static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
+ int minor, const struct file_operations *fops,
+ char *name, struct cdev *cdev,
+ struct device **dev)
+{
+ cdev_init(cdev, fops);
+ cdev->owner = THIS_MODULE;
+
+ *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
+ if (!*dev)
+ return -ENOMEM;
+
+ device_initialize(*dev);
+ (*dev)->devt = MKDEV(hdev->major, minor);
+ (*dev)->class = hclass;
+ (*dev)->release = device_release_func;
+ dev_set_drvdata(*dev, hdev);
+ dev_set_name(*dev, "%s", name);
+
+ return 0;
+}
+
+static int device_cdev_sysfs_add(struct hl_device *hdev)
+{
+ int rc;
+
+ rc = cdev_device_add(&hdev->cdev, hdev->dev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to add a char device to the system\n");
+ return rc;
+ }
+
+ rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to add a control char device to the system\n");
+ goto delete_cdev_device;
+ }
+
+ /* hl_sysfs_init() must be done after adding the device to the system */
+ rc = hl_sysfs_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "failed to initialize sysfs\n");
+ goto delete_ctrl_cdev_device;
+ }
+
+ hdev->cdev_sysfs_created = true;
+
+ return 0;
+
+delete_ctrl_cdev_device:
+ cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+delete_cdev_device:
+ cdev_device_del(&hdev->cdev, hdev->dev);
+ return rc;
+}
+
+static void device_cdev_sysfs_del(struct hl_device *hdev)
+{
+ /* device_release() won't be called so must free devices explicitly */
+ if (!hdev->cdev_sysfs_created) {
+ kfree(hdev->dev_ctrl);
+ kfree(hdev->dev);
+ return;
+ }
+
+ hl_sysfs_fini(hdev);
+ cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+ cdev_device_del(&hdev->cdev, hdev->dev);
+}
+
+/*
+ * device_early_init - do some early initialization for the habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Install the relevant function pointers and call the early_init function,
+ * if such a function exists
+ */
+static int device_early_init(struct hl_device *hdev)
+{
+ int i, rc;
+ char workq_name[32];
+
+ switch (hdev->asic_type) {
+ case ASIC_GOYA:
+ goya_set_asic_funcs(hdev);
+ strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
+ break;
+ case ASIC_GAUDI:
+ gaudi_set_asic_funcs(hdev);
+ sprintf(hdev->asic_name, "GAUDI");
+ break;
+ default:
+ dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
+ hdev->asic_type);
+ return -EINVAL;
+ }
+
+ rc = hdev->asic_funcs->early_init(hdev);
+ if (rc)
+ return rc;
+
+ rc = hl_asid_init(hdev);
+ if (rc)
+ goto early_fini;
+
+ if (hdev->asic_prop.completion_queues_count) {
+ hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
+ sizeof(*hdev->cq_wq),
+ GFP_ATOMIC);
+ if (!hdev->cq_wq) {
+ rc = -ENOMEM;
+ goto asid_fini;
+ }
+ }
+
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
+ snprintf(workq_name, 32, "hl-free-jobs-%u", i);
+ hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
+ if (hdev->cq_wq == NULL) {
+ dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
+ rc = -ENOMEM;
+ goto free_cq_wq;
+ }
+ }
+
+ hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
+ if (hdev->eq_wq == NULL) {
+ dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
+ rc = -ENOMEM;
+ goto free_cq_wq;
+ }
+
+ hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
+ GFP_KERNEL);
+ if (!hdev->hl_chip_info) {
+ rc = -ENOMEM;
+ goto free_eq_wq;
+ }
+
+ hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
+ sizeof(struct hl_device_idle_busy_ts),
+ (GFP_KERNEL | __GFP_ZERO));
+ if (!hdev->idle_busy_ts_arr) {
+ rc = -ENOMEM;
+ goto free_chip_info;
+ }
+
+ hl_cb_mgr_init(&hdev->kernel_cb_mgr);
+
+ mutex_init(&hdev->send_cpu_message_lock);
+ mutex_init(&hdev->debug_lock);
+ mutex_init(&hdev->mmu_cache_lock);
+ INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
+ spin_lock_init(&hdev->hw_queues_mirror_lock);
+ INIT_LIST_HEAD(&hdev->fpriv_list);
+ mutex_init(&hdev->fpriv_list_lock);
+ atomic_set(&hdev->in_reset, 0);
+
+ return 0;
+
+free_chip_info:
+ kfree(hdev->hl_chip_info);
+free_eq_wq:
+ destroy_workqueue(hdev->eq_wq);
+free_cq_wq:
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ if (hdev->cq_wq[i])
+ destroy_workqueue(hdev->cq_wq[i]);
+ kfree(hdev->cq_wq);
+asid_fini:
+ hl_asid_fini(hdev);
+early_fini:
+ if (hdev->asic_funcs->early_fini)
+ hdev->asic_funcs->early_fini(hdev);
+
+ return rc;
+}
+
+/*
+ * device_early_fini - finalize all that was done in device_early_init
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ */
+static void device_early_fini(struct hl_device *hdev)
+{
+ int i;
+
+ mutex_destroy(&hdev->mmu_cache_lock);
+ mutex_destroy(&hdev->debug_lock);
+ mutex_destroy(&hdev->send_cpu_message_lock);
+
+ mutex_destroy(&hdev->fpriv_list_lock);
+
+ hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
+
+ kfree(hdev->idle_busy_ts_arr);
+ kfree(hdev->hl_chip_info);
+
+ destroy_workqueue(hdev->eq_wq);
+
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ destroy_workqueue(hdev->cq_wq[i]);
+ kfree(hdev->cq_wq);
+
+ hl_asid_fini(hdev);
+
+ if (hdev->asic_funcs->early_fini)
+ hdev->asic_funcs->early_fini(hdev);
+}
+
+static void set_freq_to_low_job(struct work_struct *work)
+{
+ struct hl_device *hdev = container_of(work, struct hl_device,
+ work_freq.work);
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ if (!hdev->compute_ctx)
+ hl_device_set_frequency(hdev, PLL_LOW);
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ schedule_delayed_work(&hdev->work_freq,
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+}
+
+static void hl_device_heartbeat(struct work_struct *work)
+{
+ struct hl_device *hdev = container_of(work, struct hl_device,
+ work_heartbeat.work);
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ goto reschedule;
+
+ if (!hdev->asic_funcs->send_heartbeat(hdev))
+ goto reschedule;
+
+ dev_err(hdev->dev, "Device heartbeat failed!\n");
+ hl_device_reset(hdev, true, false);
+
+ return;
+
+reschedule:
+ schedule_delayed_work(&hdev->work_heartbeat,
+ usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+}
+
+/*
+ * device_late_init - do late stuff initialization for the habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Do stuff that either needs the device H/W queues to be active or needs
+ * to happen after all the rest of the initialization is finished
+ */
+static int device_late_init(struct hl_device *hdev)
+{
+ int rc;
+
+ if (hdev->asic_funcs->late_init) {
+ rc = hdev->asic_funcs->late_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed late initialization for the H/W\n");
+ return rc;
+ }
+ }
+
+ hdev->high_pll = hdev->asic_prop.high_pll;
+
+ /* force setting to low frequency */
+ hdev->curr_pll_profile = PLL_LOW;
+
+ if (hdev->pm_mng_profile == PM_AUTO)
+ hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
+ else
+ hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
+
+ INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
+ schedule_delayed_work(&hdev->work_freq,
+ usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+
+ if (hdev->heartbeat) {
+ INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+ schedule_delayed_work(&hdev->work_heartbeat,
+ usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+ }
+
+ hdev->late_init_done = true;
+
+ return 0;
+}
+
+/*
+ * device_late_fini - finalize all that was done in device_late_init
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ */
+static void device_late_fini(struct hl_device *hdev)
+{
+ if (!hdev->late_init_done)
+ return;
+
+ cancel_delayed_work_sync(&hdev->work_freq);
+ if (hdev->heartbeat)
+ cancel_delayed_work_sync(&hdev->work_heartbeat);
+
+ if (hdev->asic_funcs->late_fini)
+ hdev->asic_funcs->late_fini(hdev);
+
+ hdev->late_init_done = false;
+}
+
+uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
+{
+ struct hl_device_idle_busy_ts *ts;
+ ktime_t zero_ktime, curr = ktime_get();
+ u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
+ s64 period_us, last_start_us, last_end_us, last_busy_time_us,
+ total_busy_time_us = 0, total_busy_time_ms;
+
+ zero_ktime = ktime_set(0, 0);
+ period_us = period_ms * USEC_PER_MSEC;
+ ts = &hdev->idle_busy_ts_arr[last_index];
+
+ /* check case that device is currently in idle */
+ if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
+ !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
+
+ last_index--;
+ /* Handle case idle_busy_ts_idx was 0 */
+ if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+ last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+
+ ts = &hdev->idle_busy_ts_arr[last_index];
+ }
+
+ while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
+ /* Check if we are in last sample case. i.e. if the sample
+ * begun before the sampling period. This could be a real
+ * sample or 0 so need to handle both cases
+ */
+ last_start_us = ktime_to_us(
+ ktime_sub(curr, ts->idle_to_busy_ts));
+
+ if (last_start_us > period_us) {
+
+ /* First check two cases:
+ * 1. If the device is currently busy
+ * 2. If the device was idle during the whole sampling
+ * period
+ */
+
+ if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
+ /* Check if the device is currently busy */
+ if (ktime_compare(ts->idle_to_busy_ts,
+ zero_ktime))
+ return 100;
+
+ /* We either didn't have any activity or we
+ * reached an entry which is 0. Either way,
+ * exit and return what was accumulated so far
+ */
+ break;
+ }
+
+ /* If sample has finished, check it is relevant */
+ last_end_us = ktime_to_us(
+ ktime_sub(curr, ts->busy_to_idle_ts));
+
+ if (last_end_us > period_us)
+ break;
+
+ /* It is relevant so add it but with adjustment */
+ last_busy_time_us = ktime_to_us(
+ ktime_sub(ts->busy_to_idle_ts,
+ ts->idle_to_busy_ts));
+ total_busy_time_us += last_busy_time_us -
+ (last_start_us - period_us);
+ break;
+ }
+
+ /* Check if the sample is finished or still open */
+ if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
+ last_busy_time_us = ktime_to_us(
+ ktime_sub(ts->busy_to_idle_ts,
+ ts->idle_to_busy_ts));
+ else
+ last_busy_time_us = ktime_to_us(
+ ktime_sub(curr, ts->idle_to_busy_ts));
+
+ total_busy_time_us += last_busy_time_us;
+
+ last_index--;
+ /* Handle case idle_busy_ts_idx was 0 */
+ if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+ last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+
+ ts = &hdev->idle_busy_ts_arr[last_index];
+
+ overlap_cnt++;
+ }
+
+ total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
+ USEC_PER_MSEC);
+
+ return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
+}
+
+/*
+ * hl_device_set_frequency - set the frequency of the device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @freq: the new frequency value
+ *
+ * Change the frequency if needed. This function has no protection against
+ * concurrency, therefore it is assumed that the calling function has protected
+ * itself against the case of calling this function from multiple threads with
+ * different values
+ *
+ * Returns 0 if no change was done, otherwise returns 1
+ */
+int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
+{
+ if ((hdev->pm_mng_profile == PM_MANUAL) ||
+ (hdev->curr_pll_profile == freq))
+ return 0;
+
+ dev_dbg(hdev->dev, "Changing device frequency to %s\n",
+ freq == PLL_HIGH ? "high" : "low");
+
+ hdev->asic_funcs->set_pll_profile(hdev, freq);
+
+ hdev->curr_pll_profile = freq;
+
+ return 1;
+}
+
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+{
+ int rc = 0;
+
+ mutex_lock(&hdev->debug_lock);
+
+ if (!enable) {
+ if (!hdev->in_debug) {
+ dev_err(hdev->dev,
+ "Failed to disable debug mode because device was not in debug mode\n");
+ rc = -EFAULT;
+ goto out;
+ }
+
+ if (!hdev->hard_reset_pending)
+ hdev->asic_funcs->halt_coresight(hdev);
+
+ hdev->in_debug = 0;
+
+ if (!hdev->hard_reset_pending)
+ hdev->asic_funcs->enable_clock_gating(hdev);
+
+ goto out;
+ }
+
+ if (hdev->in_debug) {
+ dev_err(hdev->dev,
+ "Failed to enable debug mode because device is already in debug mode\n");
+ rc = -EFAULT;
+ goto out;
+ }
+
+ hdev->asic_funcs->disable_clock_gating(hdev);
+ hdev->in_debug = 1;
+
+out:
+ mutex_unlock(&hdev->debug_lock);
+
+ return rc;
+}
+
+/*
+ * hl_device_suspend - initiate device suspend
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Puts the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
+int hl_device_suspend(struct hl_device *hdev)
+{
+ int rc;
+
+ pci_save_state(hdev->pdev);
+
+ /* Block future CS/VM/JOB completion operations */
+ rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ if (rc) {
+ dev_err(hdev->dev, "Can't suspend while in reset\n");
+ return -EIO;
+ }
+
+ /* This blocks all other stuff that is not blocked by in_reset */
+ hdev->disabled = true;
+
+ /*
+ * Flush anyone that is inside the critical section of enqueue
+ * jobs to the H/W
+ */
+ hdev->asic_funcs->hw_queues_lock(hdev);
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ /* Flush processes that are sending message to CPU */
+ mutex_lock(&hdev->send_cpu_message_lock);
+ mutex_unlock(&hdev->send_cpu_message_lock);
+
+ rc = hdev->asic_funcs->suspend(hdev);
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to disable PCI access of device CPU\n");
+
+ /* Shut down the device */
+ pci_disable_device(hdev->pdev);
+ pci_set_power_state(hdev->pdev, PCI_D3hot);
+
+ return 0;
+}
+
+/*
+ * hl_device_resume - initiate device resume
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Bring the hw back to operating state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver resume.
+ */
+int hl_device_resume(struct hl_device *hdev)
+{
+ int rc;
+
+ pci_set_power_state(hdev->pdev, PCI_D0);
+ pci_restore_state(hdev->pdev);
+ rc = pci_enable_device_mem(hdev->pdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to enable PCI device in resume\n");
+ return rc;
+ }
+
+ pci_set_master(hdev->pdev);
+
+ rc = hdev->asic_funcs->resume(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to resume device after suspend\n");
+ goto disable_device;
+ }
+
+
+ hdev->disabled = false;
+ atomic_set(&hdev->in_reset, 0);
+
+ rc = hl_device_reset(hdev, true, false);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to reset device during resume\n");
+ goto disable_device;
+ }
+
+ return 0;
+
+disable_device:
+ pci_clear_master(hdev->pdev);
+ pci_disable_device(hdev->pdev);
+
+ return rc;
+}
+
+static int device_kill_open_processes(struct hl_device *hdev)
+{
+ u16 pending_total, pending_cnt;
+ struct hl_fpriv *hpriv;
+ struct task_struct *task = NULL;
+
+ if (hdev->pldm)
+ pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
+ else
+ pending_total = HL_PENDING_RESET_PER_SEC;
+
+ /* Giving time for user to close FD, and for processes that are inside
+ * hl_device_open to finish
+ */
+ if (!list_empty(&hdev->fpriv_list))
+ ssleep(1);
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ /* This section must be protected because we are dereferencing
+ * pointers that are freed if the process exits
+ */
+ list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
+ task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
+ if (task) {
+ dev_info(hdev->dev, "Killing user process pid=%d\n",
+ task_pid_nr(task));
+ send_sig(SIGKILL, task, 1);
+ usleep_range(1000, 10000);
+
+ put_task_struct(task);
+ }
+ }
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ /* We killed the open users, but because the driver cleans up after the
+ * user contexts are closed (e.g. mmu mappings), we need to wait again
+ * to make sure the cleaning phase is finished before continuing with
+ * the reset
+ */
+
+ pending_cnt = pending_total;
+
+ while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
+ dev_info(hdev->dev,
+ "Waiting for all unmap operations to finish before hard reset\n");
+
+ pending_cnt--;
+
+ ssleep(1);
+ }
+
+ return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
+}
+
+static void device_hard_reset_pending(struct work_struct *work)
+{
+ struct hl_device_reset_work *device_reset_work =
+ container_of(work, struct hl_device_reset_work, reset_work);
+ struct hl_device *hdev = device_reset_work->hdev;
+
+ hl_device_reset(hdev, true, true);
+
+ kfree(device_reset_work);
+}
+
+/*
+ * hl_device_reset - reset the device
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @hard_reset: should we do hard reset to all engines or just reset the
+ * compute/dma engines
+ * @from_hard_reset_thread: is the caller the hard-reset thread
+ *
+ * Block future CS and wait for pending CS to be enqueued
+ * Call ASIC H/W fini
+ * Flush all completions
+ * Re-initialize all internal data structures
+ * Call ASIC H/W init, late_init
+ * Test queues
+ * Enable device
+ *
+ * Returns 0 for success or an error on failure.
+ */
+int hl_device_reset(struct hl_device *hdev, bool hard_reset,
+ bool from_hard_reset_thread)
+{
+ int i, rc;
+
+ if (!hdev->init_done) {
+ dev_err(hdev->dev,
+ "Can't reset before initialization is done\n");
+ return 0;
+ }
+
+ if ((!hard_reset) && (!hdev->supports_soft_reset)) {
+ dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
+ hard_reset = true;
+ }
+
+ /*
+ * Prevent concurrency in this function - only one reset should be
+ * done at any given time. Only need to perform this if we didn't
+ * get from the dedicated hard reset thread
+ */
+ if (!from_hard_reset_thread) {
+ /* Block future CS/VM/JOB completion operations */
+ rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ if (rc)
+ return 0;
+
+ if (hard_reset) {
+ /* Disable PCI access from device F/W so he won't send
+ * us additional interrupts. We disable MSI/MSI-X at
+ * the halt_engines function and we can't have the F/W
+ * sending us interrupts after that. We need to disable
+ * the access here because if the device is marked
+ * disable, the message won't be send. Also, in case
+ * of heartbeat, the device CPU is marked as disable
+ * so this message won't be sent
+ */
+ if (hl_fw_send_pci_access_msg(hdev,
+ ARMCP_PACKET_DISABLE_PCI_ACCESS))
+ dev_warn(hdev->dev,
+ "Failed to disable PCI access by F/W\n");
+ }
+
+ /* This also blocks future CS/VM/JOB completion operations */
+ hdev->disabled = true;
+
+ /* Flush anyone that is inside the critical section of enqueue
+ * jobs to the H/W
+ */
+ hdev->asic_funcs->hw_queues_lock(hdev);
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ /* Flush anyone that is inside device open */
+ mutex_lock(&hdev->fpriv_list_lock);
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ dev_err(hdev->dev, "Going to RESET device!\n");
+ }
+
+again:
+ if ((hard_reset) && (!from_hard_reset_thread)) {
+ struct hl_device_reset_work *device_reset_work;
+
+ hdev->hard_reset_pending = true;
+
+ device_reset_work = kzalloc(sizeof(*device_reset_work),
+ GFP_ATOMIC);
+ if (!device_reset_work) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+
+ /*
+ * Because the reset function can't run from interrupt or
+ * from heartbeat work, we need to call the reset function
+ * from a dedicated work
+ */
+ INIT_WORK(&device_reset_work->reset_work,
+ device_hard_reset_pending);
+ device_reset_work->hdev = hdev;
+ schedule_work(&device_reset_work->reset_work);
+
+ return 0;
+ }
+
+ if (hard_reset) {
+ device_late_fini(hdev);
+
+ /*
+ * Now that the heartbeat thread is closed, flush processes
+ * which are sending messages to CPU
+ */
+ mutex_lock(&hdev->send_cpu_message_lock);
+ mutex_unlock(&hdev->send_cpu_message_lock);
+ }
+
+ /*
+ * Halt the engines and disable interrupts so we won't get any more
+ * completions from H/W and we won't have any accesses from the
+ * H/W to the host machine
+ */
+ hdev->asic_funcs->halt_engines(hdev, hard_reset);
+
+ /* Go over all the queues, release all CS and their jobs */
+ hl_cs_rollback_all(hdev);
+
+ if (hard_reset) {
+ /* Kill processes here after CS rollback. This is because the
+ * process can't really exit until all its CSs are done, which
+ * is what we do in cs rollback
+ */
+ rc = device_kill_open_processes(hdev);
+ if (rc) {
+ dev_crit(hdev->dev,
+ "Failed to kill all open processes, stopping hard reset\n");
+ goto out_err;
+ }
+
+ /* Flush the Event queue workers to make sure no other thread is
+ * reading or writing to registers during the reset
+ */
+ flush_workqueue(hdev->eq_wq);
+ }
+
+ /* Release kernel context */
+ if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
+ hdev->kernel_ctx = NULL;
+
+ /* Reset the H/W. It will be in idle state after this returns */
+ hdev->asic_funcs->hw_fini(hdev, hard_reset);
+
+ if (hard_reset) {
+ hl_vm_fini(hdev);
+ hl_mmu_fini(hdev);
+ hl_eq_reset(hdev, &hdev->event_queue);
+ }
+
+ /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
+ hl_hw_queue_reset(hdev, hard_reset);
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ hl_cq_reset(hdev, &hdev->completion_queue[i]);
+
+ hdev->idle_busy_ts_idx = 0;
+ hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
+ hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
+
+ if (hdev->cs_active_cnt)
+ dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
+ hdev->cs_active_cnt);
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ /* Make sure the context switch phase will run again */
+ if (hdev->compute_ctx) {
+ atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
+ hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
+ }
+
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ /* Finished tear-down, starting to re-initialize */
+
+ if (hard_reset) {
+ hdev->device_cpu_disabled = false;
+ hdev->hard_reset_pending = false;
+
+ if (hdev->kernel_ctx) {
+ dev_crit(hdev->dev,
+ "kernel ctx was alive during hard reset, something is terribly wrong\n");
+ rc = -EBUSY;
+ goto out_err;
+ }
+
+ rc = hl_mmu_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to initialize MMU S/W after hard reset\n");
+ goto out_err;
+ }
+
+ /* Allocate the kernel context */
+ hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
+ GFP_KERNEL);
+ if (!hdev->kernel_ctx) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+
+ hdev->compute_ctx = NULL;
+
+ rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to init kernel ctx in hard reset\n");
+ kfree(hdev->kernel_ctx);
+ hdev->kernel_ctx = NULL;
+ goto out_err;
+ }
+ }
+
+ rc = hdev->asic_funcs->hw_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to initialize the H/W after reset\n");
+ goto out_err;
+ }
+
+ hdev->disabled = false;
+
+ /* Check that the communication with the device is working */
+ rc = hdev->asic_funcs->test_queues(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to detect if device is alive after reset\n");
+ goto out_err;
+ }
+
+ if (hard_reset) {
+ rc = device_late_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed late init after hard reset\n");
+ goto out_err;
+ }
+
+ rc = hl_vm_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to init memory module after hard reset\n");
+ goto out_err;
+ }
+
+ hl_set_max_power(hdev, hdev->max_power);
+ } else {
+ rc = hdev->asic_funcs->soft_reset_late_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed late init after soft reset\n");
+ goto out_err;
+ }
+ }
+
+ atomic_set(&hdev->in_reset, 0);
+
+ if (hard_reset)
+ hdev->hard_reset_cnt++;
+ else
+ hdev->soft_reset_cnt++;
+
+ dev_warn(hdev->dev, "Successfully finished resetting the device\n");
+
+ return 0;
+
+out_err:
+ hdev->disabled = true;
+
+ if (hard_reset) {
+ dev_err(hdev->dev,
+ "Failed to reset! Device is NOT usable\n");
+ hdev->hard_reset_cnt++;
+ } else {
+ dev_err(hdev->dev,
+ "Failed to do soft-reset, trying hard reset\n");
+ hdev->soft_reset_cnt++;
+ hard_reset = true;
+ goto again;
+ }
+
+ atomic_set(&hdev->in_reset, 0);
+
+ return rc;
+}
+
+/*
+ * hl_device_init - main initialization function for habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Allocate an id for the device, do early initialization and then call the
+ * ASIC specific initialization functions. Finally, create the cdev and the
+ * Linux device to expose it to the user
+ */
+int hl_device_init(struct hl_device *hdev, struct class *hclass)
+{
+ int i, rc, cq_cnt, cq_ready_cnt;
+ char *name;
+ bool add_cdev_sysfs_on_err = false;
+
+ name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
+ if (!name) {
+ rc = -ENOMEM;
+ goto out_disabled;
+ }
+
+ /* Initialize cdev and device structures */
+ rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
+ &hdev->cdev, &hdev->dev);
+
+ kfree(name);
+
+ if (rc)
+ goto out_disabled;
+
+ name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
+ if (!name) {
+ rc = -ENOMEM;
+ goto free_dev;
+ }
+
+ /* Initialize cdev and device structures for control device */
+ rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
+ name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
+
+ kfree(name);
+
+ if (rc)
+ goto free_dev;
+
+ /* Initialize ASIC function pointers and perform early init */
+ rc = device_early_init(hdev);
+ if (rc)
+ goto free_dev_ctrl;
+
+ /*
+ * Start calling ASIC initialization. First S/W then H/W and finally
+ * late init
+ */
+ rc = hdev->asic_funcs->sw_init(hdev);
+ if (rc)
+ goto early_fini;
+
+ /*
+ * Initialize the H/W queues. Must be done before hw_init, because
+ * there the addresses of the kernel queue are being written to the
+ * registers of the device
+ */
+ rc = hl_hw_queues_create(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "failed to initialize kernel queues\n");
+ goto sw_fini;
+ }
+
+ cq_cnt = hdev->asic_prop.completion_queues_count;
+
+ /*
+ * Initialize the completion queues. Must be done before hw_init,
+ * because there the addresses of the completion queues are being
+ * passed as arguments to request_irq
+ */
+ if (cq_cnt) {
+ hdev->completion_queue = kcalloc(cq_cnt,
+ sizeof(*hdev->completion_queue),
+ GFP_KERNEL);
+
+ if (!hdev->completion_queue) {
+ dev_err(hdev->dev,
+ "failed to allocate completion queues\n");
+ rc = -ENOMEM;
+ goto hw_queues_destroy;
+ }
+ }
+
+ for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
+ rc = hl_cq_init(hdev, &hdev->completion_queue[i],
+ hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to initialize completion queue\n");
+ goto cq_fini;
+ }
+ hdev->completion_queue[i].cq_idx = i;
+ }
+
+ /*
+ * Initialize the event queue. Must be done before hw_init,
+ * because there the address of the event queue is being
+ * passed as argument to request_irq
+ */
+ rc = hl_eq_init(hdev, &hdev->event_queue);
+ if (rc) {
+ dev_err(hdev->dev, "failed to initialize event queue\n");
+ goto cq_fini;
+ }
+
+ /* MMU S/W must be initialized before kernel context is created */
+ rc = hl_mmu_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
+ goto eq_fini;
+ }
+
+ /* Allocate the kernel context */
+ hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
+ if (!hdev->kernel_ctx) {
+ rc = -ENOMEM;
+ goto mmu_fini;
+ }
+
+ hdev->compute_ctx = NULL;
+
+ rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
+ if (rc) {
+ dev_err(hdev->dev, "failed to initialize kernel context\n");
+ kfree(hdev->kernel_ctx);
+ goto mmu_fini;
+ }
+
+ rc = hl_cb_pool_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "failed to initialize CB pool\n");
+ goto release_ctx;
+ }
+
+ hl_debugfs_add_device(hdev);
+
+ if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+ dev_info(hdev->dev,
+ "H/W state is dirty, must reset before initializing\n");
+ hdev->asic_funcs->halt_engines(hdev, true);
+ hdev->asic_funcs->hw_fini(hdev, true);
+ }
+
+ /*
+ * From this point, in case of an error, add char devices and create
+ * sysfs nodes as part of the error flow, to allow debugging.
+ */
+ add_cdev_sysfs_on_err = true;
+
+ rc = hdev->asic_funcs->hw_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "failed to initialize the H/W\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
+ hdev->disabled = false;
+
+ /* Check that the communication with the device is working */
+ rc = hdev->asic_funcs->test_queues(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to detect if device is alive\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
+ rc = device_late_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed late initialization\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
+ dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
+ hdev->asic_name,
+ hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
+
+ rc = hl_vm_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize memory module\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
+ /*
+ * Expose devices and sysfs nodes to user.
+ * From here there is no need to add char devices and create sysfs nodes
+ * in case of an error.
+ */
+ add_cdev_sysfs_on_err = false;
+ rc = device_cdev_sysfs_add(hdev);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to add char devices and sysfs nodes\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
+ /*
+ * hl_hwmon_init() must be called after device_late_init(), because only
+ * there we get the information from the device about which
+ * hwmon-related sensors the device supports.
+ * Furthermore, it must be done after adding the device to the system.
+ */
+ rc = hl_hwmon_init(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize hwmon\n");
+ rc = 0;
+ goto out_disabled;
+ }
+
+ dev_notice(hdev->dev,
+ "Successfully added device to habanalabs driver\n");
+
+ hdev->init_done = true;
+
+ return 0;
+
+release_ctx:
+ if (hl_ctx_put(hdev->kernel_ctx) != 1)
+ dev_err(hdev->dev,
+ "kernel ctx is still alive on initialization failure\n");
+mmu_fini:
+ hl_mmu_fini(hdev);
+eq_fini:
+ hl_eq_fini(hdev, &hdev->event_queue);
+cq_fini:
+ for (i = 0 ; i < cq_ready_cnt ; i++)
+ hl_cq_fini(hdev, &hdev->completion_queue[i]);
+ kfree(hdev->completion_queue);
+hw_queues_destroy:
+ hl_hw_queues_destroy(hdev);
+sw_fini:
+ hdev->asic_funcs->sw_fini(hdev);
+early_fini:
+ device_early_fini(hdev);
+free_dev_ctrl:
+ kfree(hdev->dev_ctrl);
+free_dev:
+ kfree(hdev->dev);
+out_disabled:
+ hdev->disabled = true;
+ if (add_cdev_sysfs_on_err)
+ device_cdev_sysfs_add(hdev);
+ if (hdev->pdev)
+ dev_err(&hdev->pdev->dev,
+ "Failed to initialize hl%d. Device is NOT usable !\n",
+ hdev->id / 2);
+ else
+ pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
+ hdev->id / 2);
+
+ return rc;
+}
+
+/*
+ * hl_device_fini - main tear-down function for habanalabs device
+ *
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Destroy the device, call ASIC fini functions and release the id
+ */
+void hl_device_fini(struct hl_device *hdev)
+{
+ int i, rc;
+ ktime_t timeout;
+
+ dev_info(hdev->dev, "Removing device\n");
+
+ /*
+ * This function is competing with the reset function, so try to
+ * take the reset atomic and if we are already in middle of reset,
+ * wait until reset function is finished. Reset function is designed
+ * to always finish. However, in Gaudi, because of all the network
+ * ports, the hard reset could take between 10-30 seconds
+ */
+
+ timeout = ktime_add_us(ktime_get(),
+ HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
+ rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ while (rc) {
+ usleep_range(50, 200);
+ rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ if (ktime_compare(ktime_get(), timeout) > 0) {
+ WARN(1, "Failed to remove device because reset function did not finish\n");
+ return;
+ }
+ }
+
+ /* Mark device as disabled */
+ hdev->disabled = true;
+
+ /* Flush anyone that is inside the critical section of enqueue
+ * jobs to the H/W
+ */
+ hdev->asic_funcs->hw_queues_lock(hdev);
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ /* Flush anyone that is inside device open */
+ mutex_lock(&hdev->fpriv_list_lock);
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ hdev->hard_reset_pending = true;
+
+ hl_hwmon_fini(hdev);
+
+ device_late_fini(hdev);
+
+ hl_debugfs_remove_device(hdev);
+
+ /*
+ * Halt the engines and disable interrupts so we won't get any more
+ * completions from H/W and we won't have any accesses from the
+ * H/W to the host machine
+ */
+ hdev->asic_funcs->halt_engines(hdev, true);
+
+ /* Go over all the queues, release all CS and their jobs */
+ hl_cs_rollback_all(hdev);
+
+ /* Kill processes here after CS rollback. This is because the process
+ * can't really exit until all its CSs are done, which is what we
+ * do in cs rollback
+ */
+ rc = device_kill_open_processes(hdev);
+ if (rc)
+ dev_crit(hdev->dev, "Failed to kill all open processes\n");
+
+ hl_cb_pool_fini(hdev);
+
+ /* Release kernel context */
+ if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
+ dev_err(hdev->dev, "kernel ctx is still alive\n");
+
+ /* Reset the H/W. It will be in idle state after this returns */
+ hdev->asic_funcs->hw_fini(hdev, true);
+
+ hl_vm_fini(hdev);
+
+ hl_mmu_fini(hdev);
+
+ hl_eq_fini(hdev, &hdev->event_queue);
+
+ for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ hl_cq_fini(hdev, &hdev->completion_queue[i]);
+ kfree(hdev->completion_queue);
+
+ hl_hw_queues_destroy(hdev);
+
+ /* Call ASIC S/W finalize function */
+ hdev->asic_funcs->sw_fini(hdev);
+
+ device_early_fini(hdev);
+
+ /* Hide devices and sysfs nodes from user */
+ device_cdev_sysfs_del(hdev);
+
+ pr_info("removed device successfully\n");
+}
+
+/*
+ * MMIO register access helper functions.
+ */
+
+/*
+ * hl_rreg - Read an MMIO register
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @reg: MMIO register offset (in bytes)
+ *
+ * Returns the value of the MMIO register we are asked to read
+ *
+ */
+inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
+{
+ return readl(hdev->rmmio + reg);
+}
+
+/*
+ * hl_wreg - Write to an MMIO register
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @reg: MMIO register offset (in bytes)
+ * @val: 32-bit value
+ *
+ * Writes the 32-bit value into the MMIO register
+ *
+ */
+inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
+{
+ writel(val, hdev->rmmio + reg);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/common/hl_boot_if.h"
+
+#include <linux/firmware.h>
+#include <linux/genalloc.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/slab.h>
+
+/**
+ * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
+ *
+ * @hdev: pointer to hl_device structure.
+ * @fw_name: the firmware image name
+ * @dst: IO memory mapped address space to copy firmware to
+ *
+ * Copy fw code from firmware file to device memory.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
+ void __iomem *dst)
+{
+ const struct firmware *fw;
+ const u64 *fw_data;
+ size_t fw_size;
+ int rc;
+
+ rc = request_firmware(&fw, fw_name, hdev->dev);
+ if (rc) {
+ dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
+ goto out;
+ }
+
+ fw_size = fw->size;
+ if ((fw_size % 4) != 0) {
+ dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
+ fw_name, fw_size);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
+
+ fw_data = (const u64 *) fw->data;
+
+ memcpy_toio(dst, fw_data, fw_size);
+
+out:
+ release_firmware(fw);
+ return rc;
+}
+
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
+{
+ struct armcp_packet pkt = {};
+
+ pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
+ sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
+}
+
+int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+ u16 len, u32 timeout, long *result)
+{
+ struct armcp_packet *pkt;
+ dma_addr_t pkt_dma_addr;
+ u32 tmp;
+ int rc = 0;
+
+ pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
+ &pkt_dma_addr);
+ if (!pkt) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for packet to CPU\n");
+ return -ENOMEM;
+ }
+
+ memcpy(pkt, msg, len);
+
+ mutex_lock(&hdev->send_cpu_message_lock);
+
+ if (hdev->disabled)
+ goto out;
+
+ if (hdev->device_cpu_disabled) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
+ goto out;
+ }
+
+ rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
+ (tmp == ARMCP_PACKET_FENCE_VAL), 1000,
+ timeout, true);
+
+ hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
+
+ if (rc == -ETIMEDOUT) {
+ dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
+ hdev->device_cpu_disabled = true;
+ goto out;
+ }
+
+ tmp = le32_to_cpu(pkt->ctl);
+
+ rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
+ if (rc) {
+ dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
+ rc,
+ (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
+ >> ARMCP_PKT_CTL_OPCODE_SHIFT);
+ rc = -EIO;
+ } else if (result) {
+ *result = (long) le64_to_cpu(pkt->result);
+ }
+
+out:
+ mutex_unlock(&hdev->send_cpu_message_lock);
+
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
+
+ return rc;
+}
+
+int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
+{
+ struct armcp_packet pkt;
+ long result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.value = cpu_to_le64(event_type);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if (rc)
+ dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
+
+ return rc;
+}
+
+int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
+ size_t irq_arr_size)
+{
+ struct armcp_unmask_irq_arr_packet *pkt;
+ size_t total_pkt_size;
+ long result;
+ int rc;
+
+ total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
+ irq_arr_size;
+
+ /* data should be aligned to 8 bytes in order to ArmCP to copy it */
+ total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+
+ /* total_pkt_size is casted to u16 later on */
+ if (total_pkt_size > USHRT_MAX) {
+ dev_err(hdev->dev, "too many elements in IRQ array\n");
+ return -EINVAL;
+ }
+
+ pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+ if (!pkt)
+ return -ENOMEM;
+
+ pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
+ memcpy(&pkt->irqs, irq_arr, irq_arr_size);
+
+ pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
+ total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if (rc)
+ dev_err(hdev->dev, "failed to unmask IRQ array\n");
+
+ kfree(pkt);
+
+ return rc;
+}
+
+int hl_fw_test_cpu_queue(struct hl_device *hdev)
+{
+ struct armcp_packet test_pkt = {};
+ long result;
+ int rc;
+
+ test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
+ sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if (!rc) {
+ if (result != ARMCP_PACKET_FENCE_VAL)
+ dev_err(hdev->dev,
+ "CPU queue test failed (0x%08lX)\n", result);
+ } else {
+ dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
+ }
+
+ return rc;
+}
+
+void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle)
+{
+ u64 kernel_addr;
+
+ kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
+
+ *dma_handle = hdev->cpu_accessible_dma_address +
+ (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
+
+ return (void *) (uintptr_t) kernel_addr;
+}
+
+void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ void *vaddr)
+{
+ gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
+ size);
+}
+
+int hl_fw_send_heartbeat(struct hl_device *hdev)
+{
+ struct armcp_packet hb_pkt = {};
+ long result;
+ int rc;
+
+ hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
+ sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
+ rc = -EIO;
+
+ return rc;
+}
+
+int hl_fw_armcp_info_get(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct armcp_packet pkt = {};
+ void *armcp_info_cpu_addr;
+ dma_addr_t armcp_info_dma_addr;
+ long result;
+ int rc;
+
+ armcp_info_cpu_addr =
+ hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ sizeof(struct armcp_info),
+ &armcp_info_dma_addr);
+ if (!armcp_info_cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for ArmCP info packet\n");
+ return -ENOMEM;
+ }
+
+ memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(armcp_info_dma_addr);
+ pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle ArmCP info pkt, error %d\n", rc);
+ goto out;
+ }
+
+ memcpy(&prop->armcp_info, armcp_info_cpu_addr,
+ sizeof(prop->armcp_info));
+
+ rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to build hwmon channel info, error %d\n", rc);
+ rc = -EFAULT;
+ goto out;
+ }
+
+out:
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ sizeof(struct armcp_info), armcp_info_cpu_addr);
+
+ return rc;
+}
+
+int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
+{
+ struct armcp_packet pkt = {};
+ void *eeprom_info_cpu_addr;
+ dma_addr_t eeprom_info_dma_addr;
+ long result;
+ int rc;
+
+ eeprom_info_cpu_addr =
+ hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ max_size, &eeprom_info_dma_addr);
+ if (!eeprom_info_cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
+ return -ENOMEM;
+ }
+
+ memset(eeprom_info_cpu_addr, 0, max_size);
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
+ pkt.data_max_size = cpu_to_le32(max_size);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle ArmCP EEPROM packet, error %d\n", rc);
+ goto out;
+ }
+
+ /* result contains the actual size */
+ memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
+
+out:
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
+ eeprom_info_cpu_addr);
+
+ return rc;
+}
+
+static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
+{
+ u32 err_val;
+
+ /* Some of the firmware status codes are deprecated in newer f/w
+ * versions. In those versions, the errors are reported
+ * in different registers. Therefore, we need to check those
+ * registers and print the exact errors. Moreover, there
+ * may be multiple errors, so we need to report on each error
+ * separately. Some of the error codes might indicate a state
+ * that is not an error per-se, but it is an error in production
+ * environment
+ */
+ err_val = RREG32(boot_err0_reg);
+ if (!(err_val & CPU_BOOT_ERR0_ENABLED))
+ return;
+
+ if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - DRAM initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+ dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
+ if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - Thermal Sensor initialization failed\n");
+ if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+ dev_warn(hdev->dev,
+ "Device boot warning - Skipped DRAM initialization\n");
+ if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
+ dev_warn(hdev->dev,
+ "Device boot error - Skipped waiting for BMC\n");
+ if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+ dev_err(hdev->dev,
+ "Device boot error - Serdes data from BMC not available\n");
+ if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+ dev_err(hdev->dev,
+ "Device boot error - NIC F/W initialization failed\n");
+}
+
+static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
+{
+ switch (status) {
+ case CPU_BOOT_STATUS_NA:
+ dev_err(hdev->dev,
+ "Device boot error - BTL did NOT run\n");
+ break;
+ case CPU_BOOT_STATUS_IN_WFE:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck inside WFE loop\n");
+ break;
+ case CPU_BOOT_STATUS_IN_BTL:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in BTL\n");
+ break;
+ case CPU_BOOT_STATUS_IN_PREBOOT:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in Preboot\n");
+ break;
+ case CPU_BOOT_STATUS_IN_SPL:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in SPL\n");
+ break;
+ case CPU_BOOT_STATUS_IN_UBOOT:
+ dev_err(hdev->dev,
+ "Device boot error - Stuck in u-boot\n");
+ break;
+ case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
+ dev_err(hdev->dev,
+ "Device boot error - DRAM initialization failed\n");
+ break;
+ case CPU_BOOT_STATUS_UBOOT_NOT_READY:
+ dev_err(hdev->dev,
+ "Device boot error - u-boot stopped by user\n");
+ break;
+ case CPU_BOOT_STATUS_TS_INIT_FAIL:
+ dev_err(hdev->dev,
+ "Device boot error - Thermal Sensor initialization failed\n");
+ break;
+ default:
+ dev_err(hdev->dev,
+ "Device boot error - Invalid status code %d\n",
+ status);
+ break;
+ }
+}
+
+int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
+ u32 boot_err0_reg, bool skip_bmc,
+ u32 cpu_timeout, u32 boot_fit_timeout)
+{
+ u32 status;
+ int rc;
+
+ dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
+ cpu_timeout / USEC_PER_SEC);
+
+ /* Wait for boot FIT request */
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
+ 10000,
+ boot_fit_timeout);
+
+ if (rc) {
+ dev_dbg(hdev->dev,
+ "No boot fit request received, resuming boot\n");
+ } else {
+ rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
+ if (rc)
+ goto out;
+
+ /* Clear device CPU message status */
+ WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
+
+ /* Signal device CPU that boot loader is ready */
+ WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+
+ /* Poll for CPU device ack */
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_msg_status_reg,
+ status,
+ status == CPU_MSG_OK,
+ 10000,
+ boot_fit_timeout);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Timeout waiting for boot fit load ack\n");
+ goto out;
+ }
+
+ /* Clear message */
+ WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+ }
+
+ /* Make sure CPU boot-loader is running */
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ (status == CPU_BOOT_STATUS_DRAM_RDY) ||
+ (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
+ (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
+ (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+ 10000,
+ cpu_timeout);
+
+ /* Read U-Boot, preboot versions now in case we will later fail */
+ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
+ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
+
+ /* Some of the status codes below are deprecated in newer f/w
+ * versions but we keep them here for backward compatibility
+ */
+ if (rc) {
+ hl_detect_cpu_boot_status(hdev, status);
+ rc = -EIO;
+ goto out;
+ }
+
+ if (!hdev->fw_loading) {
+ dev_info(hdev->dev, "Skip loading FW\n");
+ goto out;
+ }
+
+ if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
+ goto out;
+
+ dev_info(hdev->dev,
+ "Loading firmware to device, may take some time...\n");
+
+ rc = hdev->asic_funcs->load_firmware_to_device(hdev);
+ if (rc)
+ goto out;
+
+ if (skip_bmc) {
+ WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
+
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
+ 10000,
+ cpu_timeout);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get ACK on skipping BMC, %d\n",
+ status);
+ WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+ rc = -EIO;
+ goto out;
+ }
+ }
+
+ WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+
+ rc = hl_poll_timeout(
+ hdev,
+ cpu_boot_status_reg,
+ status,
+ (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+ 10000,
+ cpu_timeout);
+
+ /* Clear message */
+ WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+
+ if (rc) {
+ if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
+ dev_err(hdev->dev,
+ "Device reports FIT image is corrupted\n");
+ else
+ dev_err(hdev->dev,
+ "Failed to load firmware to device, %d\n",
+ status);
+
+ rc = -EIO;
+ goto out;
+ }
+
+ dev_info(hdev->dev, "Successfully loaded firmware to device\n");
+
+out:
+ fw_read_errors(hdev, boot_err0_reg);
+
+ return rc;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef HABANALABSP_H_
+#define HABANALABSP_H_
+
+#include "include/common/armcp_if.h"
+#include "include/common/qman_if.h"
+#include <uapi/misc/habanalabs.h>
+
+#include <linux/cdev.h>
+#include <linux/iopoll.h>
+#include <linux/irqreturn.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
+#include <linux/hashtable.h>
+
+#define HL_NAME "habanalabs"
+
+#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT)
+
+#define HL_PENDING_RESET_PER_SEC 30
+
+#define HL_HARD_RESET_MAX_TIMEOUT 120
+
+#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */
+
+#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */
+
+#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */
+
+#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
+
+#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
+
+#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
+
+#define HL_IDLE_BUSY_TS_ARR_SIZE 4096
+
+/* Memory */
+#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+
+/* MMU */
+#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+
+/*
+ * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
+ * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
+ */
+#define HL_RSVD_SOBS 4
+#define HL_RSVD_MONS 2
+
+#define HL_RSVD_SOBS_IN_USE 2
+#define HL_RSVD_MONS_IN_USE 1
+
+#define HL_MAX_SOB_VAL (1 << 15)
+
+#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0))
+#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1))
+
+#define HL_PCI_NUM_BARS 6
+
+/**
+ * struct pgt_info - MMU hop page info.
+ * @node: hash linked-list node for the pgts shadow hash of pgts.
+ * @phys_addr: physical address of the pgt.
+ * @shadow_addr: shadow hop in the host.
+ * @ctx: pointer to the owner ctx.
+ * @num_of_ptes: indicates how many ptes are used in the pgt.
+ *
+ * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
+ * is needed during mapping, a new page is allocated and this structure holds
+ * its essential information. During unmapping, if no valid PTEs remained in the
+ * page, it is freed with its pgt_info structure.
+ */
+struct pgt_info {
+ struct hlist_node node;
+ u64 phys_addr;
+ u64 shadow_addr;
+ struct hl_ctx *ctx;
+ int num_of_ptes;
+};
+
+struct hl_device;
+struct hl_fpriv;
+
+/**
+ * enum hl_pci_match_mode - pci match mode per region
+ * @PCI_ADDRESS_MATCH_MODE: address match mode
+ * @PCI_BAR_MATCH_MODE: bar match mode
+ */
+enum hl_pci_match_mode {
+ PCI_ADDRESS_MATCH_MODE,
+ PCI_BAR_MATCH_MODE
+};
+
+/**
+ * enum hl_fw_component - F/W components to read version through registers.
+ * @FW_COMP_UBOOT: u-boot.
+ * @FW_COMP_PREBOOT: preboot.
+ */
+enum hl_fw_component {
+ FW_COMP_UBOOT,
+ FW_COMP_PREBOOT
+};
+
+/**
+ * enum hl_queue_type - Supported QUEUE types.
+ * @QUEUE_TYPE_NA: queue is not available.
+ * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
+ * host.
+ * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
+ * memories and/or operates the compute engines.
+ * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
+ * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
+ * notifications are sent by H/W.
+ */
+enum hl_queue_type {
+ QUEUE_TYPE_NA,
+ QUEUE_TYPE_EXT,
+ QUEUE_TYPE_INT,
+ QUEUE_TYPE_CPU,
+ QUEUE_TYPE_HW
+};
+
+enum hl_cs_type {
+ CS_TYPE_DEFAULT,
+ CS_TYPE_SIGNAL,
+ CS_TYPE_WAIT
+};
+
+/*
+ * struct hl_inbound_pci_region - inbound region descriptor
+ * @mode: pci match mode for this region
+ * @addr: region target address
+ * @size: region size in bytes
+ * @offset_in_bar: offset within bar (address match mode)
+ * @bar: bar id
+ */
+struct hl_inbound_pci_region {
+ enum hl_pci_match_mode mode;
+ u64 addr;
+ u64 size;
+ u64 offset_in_bar;
+ u8 bar;
+};
+
+/*
+ * struct hl_outbound_pci_region - outbound region descriptor
+ * @addr: region target address
+ * @size: region size in bytes
+ */
+struct hl_outbound_pci_region {
+ u64 addr;
+ u64 size;
+};
+
+/*
+ * struct hl_hw_sob - H/W SOB info.
+ * @hdev: habanalabs device structure.
+ * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
+ * @sob_id: id of this SOB.
+ * @q_idx: the H/W queue that uses this SOB.
+ */
+struct hl_hw_sob {
+ struct hl_device *hdev;
+ struct kref kref;
+ u32 sob_id;
+ u32 q_idx;
+};
+
+/**
+ * struct hw_queue_properties - queue information.
+ * @type: queue type.
+ * @driver_only: true if only the driver is allowed to send a job to this queue,
+ * false otherwise.
+ * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
+ * queue, false otherwise (a CB address must be provided).
+ * @supports_sync_stream: True if queue supports sync stream
+ */
+struct hw_queue_properties {
+ enum hl_queue_type type;
+ u8 driver_only;
+ u8 requires_kernel_cb;
+ u8 supports_sync_stream;
+};
+
+/**
+ * enum vm_type_t - virtual memory mapping request information.
+ * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
+ * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
+ */
+enum vm_type_t {
+ VM_TYPE_USERPTR = 0x1,
+ VM_TYPE_PHYS_PACK = 0x2
+};
+
+/**
+ * enum hl_device_hw_state - H/W device state. use this to understand whether
+ * to do reset before hw_init or not
+ * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
+ * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute
+ * hw_init
+ */
+enum hl_device_hw_state {
+ HL_DEVICE_HW_STATE_CLEAN = 0,
+ HL_DEVICE_HW_STATE_DIRTY
+};
+
+/**
+ * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @start_addr: virtual start address of the memory region.
+ * @end_addr: virtual end address of the memory region.
+ * @hop0_shift: shift of hop 0 mask.
+ * @hop1_shift: shift of hop 1 mask.
+ * @hop2_shift: shift of hop 2 mask.
+ * @hop3_shift: shift of hop 3 mask.
+ * @hop4_shift: shift of hop 4 mask.
+ * @hop0_mask: mask to get the PTE address in hop 0.
+ * @hop1_mask: mask to get the PTE address in hop 1.
+ * @hop2_mask: mask to get the PTE address in hop 2.
+ * @hop3_mask: mask to get the PTE address in hop 3.
+ * @hop4_mask: mask to get the PTE address in hop 4.
+ * @page_size: default page size used to allocate memory.
+ */
+struct hl_mmu_properties {
+ u64 start_addr;
+ u64 end_addr;
+ u64 hop0_shift;
+ u64 hop1_shift;
+ u64 hop2_shift;
+ u64 hop3_shift;
+ u64 hop4_shift;
+ u64 hop0_mask;
+ u64 hop1_mask;
+ u64 hop2_mask;
+ u64 hop3_mask;
+ u64 hop4_mask;
+ u32 page_size;
+};
+
+/**
+ * struct asic_fixed_properties - ASIC specific immutable properties.
+ * @hw_queues_props: H/W queues properties.
+ * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
+ * available sensors.
+ * @uboot_ver: F/W U-boot version.
+ * @preboot_ver: F/W Preboot version.
+ * @dmmu: DRAM MMU address translation properties.
+ * @pmmu: PCI (host) MMU address translation properties.
+ * @pmmu_huge: PCI (host) MMU address translation properties for memory
+ * allocated with huge pages.
+ * @sram_base_address: SRAM physical start address.
+ * @sram_end_address: SRAM physical end address.
+ * @sram_user_base_address - SRAM physical start address for user access.
+ * @dram_base_address: DRAM physical start address.
+ * @dram_end_address: DRAM physical end address.
+ * @dram_user_base_address: DRAM physical start address for user access.
+ * @dram_size: DRAM total size.
+ * @dram_pci_bar_size: size of PCI bar towards DRAM.
+ * @max_power_default: max power of the device after reset
+ * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
+ * fault.
+ * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
+ * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
+ * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
+ * @mmu_dram_default_page_addr: DRAM default page physical address.
+ * @mmu_pgt_size: MMU page tables total size.
+ * @mmu_pte_size: PTE size in MMU page tables.
+ * @mmu_hop_table_size: MMU hop table size.
+ * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
+ * @dram_page_size: page size for MMU DRAM allocation.
+ * @cfg_size: configuration space size on SRAM.
+ * @sram_size: total size of SRAM.
+ * @max_asid: maximum number of open contexts (ASIDs).
+ * @num_of_events: number of possible internal H/W IRQs.
+ * @psoc_pci_pll_nr: PCI PLL NR value.
+ * @psoc_pci_pll_nf: PCI PLL NF value.
+ * @psoc_pci_pll_od: PCI PLL OD value.
+ * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
+ * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
+ * @high_pll: high PLL frequency used by the device.
+ * @cb_pool_cb_cnt: number of CBs in the CB pool.
+ * @cb_pool_cb_size: size of each CB in the CB pool.
+ * @max_pending_cs: maximum of concurrent pending command submissions
+ * @max_queues: maximum amount of queues in the system
+ * @sync_stream_first_sob: first sync object available for sync stream use
+ * @sync_stream_first_mon: first monitor available for sync stream use
+ * @tpc_enabled_mask: which TPCs are enabled.
+ * @completion_queues_count: number of completion queues.
+ */
+struct asic_fixed_properties {
+ struct hw_queue_properties *hw_queues_props;
+ struct armcp_info armcp_info;
+ char uboot_ver[VERSION_MAX_LEN];
+ char preboot_ver[VERSION_MAX_LEN];
+ struct hl_mmu_properties dmmu;
+ struct hl_mmu_properties pmmu;
+ struct hl_mmu_properties pmmu_huge;
+ u64 sram_base_address;
+ u64 sram_end_address;
+ u64 sram_user_base_address;
+ u64 dram_base_address;
+ u64 dram_end_address;
+ u64 dram_user_base_address;
+ u64 dram_size;
+ u64 dram_pci_bar_size;
+ u64 max_power_default;
+ u64 dram_size_for_default_page_mapping;
+ u64 pcie_dbi_base_address;
+ u64 pcie_aux_dbi_reg_addr;
+ u64 mmu_pgt_addr;
+ u64 mmu_dram_default_page_addr;
+ u32 mmu_pgt_size;
+ u32 mmu_pte_size;
+ u32 mmu_hop_table_size;
+ u32 mmu_hop0_tables_total_size;
+ u32 dram_page_size;
+ u32 cfg_size;
+ u32 sram_size;
+ u32 max_asid;
+ u32 num_of_events;
+ u32 psoc_pci_pll_nr;
+ u32 psoc_pci_pll_nf;
+ u32 psoc_pci_pll_od;
+ u32 psoc_pci_pll_div_factor;
+ u32 psoc_timestamp_frequency;
+ u32 high_pll;
+ u32 cb_pool_cb_cnt;
+ u32 cb_pool_cb_size;
+ u32 max_pending_cs;
+ u32 max_queues;
+ u16 sync_stream_first_sob;
+ u16 sync_stream_first_mon;
+ u8 tpc_enabled_mask;
+ u8 completion_queues_count;
+};
+
+/**
+ * struct hl_cs_compl - command submission completion object.
+ * @base_fence: kernel fence object.
+ * @lock: spinlock to protect fence.
+ * @hdev: habanalabs device structure.
+ * @hw_sob: the H/W SOB used in this signal/wait CS.
+ * @cs_seq: command submission sequence number.
+ * @type: type of the CS - signal/wait.
+ * @sob_val: the SOB value that is used in this signal/wait CS.
+ */
+struct hl_cs_compl {
+ struct dma_fence base_fence;
+ spinlock_t lock;
+ struct hl_device *hdev;
+ struct hl_hw_sob *hw_sob;
+ u64 cs_seq;
+ enum hl_cs_type type;
+ u16 sob_val;
+};
+
+/*
+ * Command Buffers
+ */
+
+/**
+ * struct hl_cb_mgr - describes a Command Buffer Manager.
+ * @cb_lock: protects cb_handles.
+ * @cb_handles: an idr to hold all command buffer handles.
+ */
+struct hl_cb_mgr {
+ spinlock_t cb_lock;
+ struct idr cb_handles; /* protected by cb_lock */
+};
+
+/**
+ * struct hl_cb - describes a Command Buffer.
+ * @refcount: reference counter for usage of the CB.
+ * @hdev: pointer to device this CB belongs to.
+ * @lock: spinlock to protect mmap/cs flows.
+ * @debugfs_list: node in debugfs list of command buffers.
+ * @pool_list: node in pool list of command buffers.
+ * @kernel_address: Holds the CB's kernel virtual address.
+ * @bus_address: Holds the CB's DMA address.
+ * @mmap_size: Holds the CB's size that was mmaped.
+ * @size: holds the CB's size.
+ * @id: the CB's ID.
+ * @cs_cnt: holds number of CS that this CB participates in.
+ * @ctx_id: holds the ID of the owner's context.
+ * @mmap: true if the CB is currently mmaped to user.
+ * @is_pool: true if CB was acquired from the pool, false otherwise.
+ */
+struct hl_cb {
+ struct kref refcount;
+ struct hl_device *hdev;
+ spinlock_t lock;
+ struct list_head debugfs_list;
+ struct list_head pool_list;
+ u64 kernel_address;
+ dma_addr_t bus_address;
+ u32 mmap_size;
+ u32 size;
+ u32 id;
+ u32 cs_cnt;
+ u32 ctx_id;
+ u8 mmap;
+ u8 is_pool;
+};
+
+
+/*
+ * QUEUES
+ */
+
+struct hl_cs_job;
+
+/* Queue length of external and HW queues */
+#define HL_QUEUE_LENGTH 4096
+#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
+
+#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
+#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
+#endif
+
+/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
+#define HL_CQ_LENGTH HL_QUEUE_LENGTH
+#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
+
+/* Must be power of 2 */
+#define HL_EQ_LENGTH 64
+#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
+
+/* Host <-> ArmCP shared memory size */
+#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
+
+/**
+ * struct hl_hw_queue - describes a H/W transport queue.
+ * @hw_sob: array of the used H/W SOBs by this H/W queue.
+ * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
+ * @queue_type: type of queue.
+ * @kernel_address: holds the queue's kernel virtual address.
+ * @bus_address: holds the queue's DMA address.
+ * @pi: holds the queue's pi value.
+ * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
+ * @hw_queue_id: the id of the H/W queue.
+ * @cq_id: the id for the corresponding CQ for this H/W queue.
+ * @msi_vec: the IRQ number of the H/W queue.
+ * @int_queue_len: length of internal queue (number of entries).
+ * @next_sob_val: the next value to use for the currently used SOB.
+ * @base_sob_id: the base SOB id of the SOBs used by this queue.
+ * @base_mon_id: the base MON id of the MONs used by this queue.
+ * @valid: is the queue valid (we have array of 32 queues, not all of them
+ * exist).
+ * @curr_sob_offset: the id offset to the currently used SOB from the
+ * HL_RSVD_SOBS that are being used by this queue.
+ * @supports_sync_stream: True if queue supports sync stream
+ */
+struct hl_hw_queue {
+ struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
+ struct hl_cs_job **shadow_queue;
+ enum hl_queue_type queue_type;
+ u64 kernel_address;
+ dma_addr_t bus_address;
+ u32 pi;
+ atomic_t ci;
+ u32 hw_queue_id;
+ u32 cq_id;
+ u32 msi_vec;
+ u16 int_queue_len;
+ u16 next_sob_val;
+ u16 base_sob_id;
+ u16 base_mon_id;
+ u8 valid;
+ u8 curr_sob_offset;
+ u8 supports_sync_stream;
+};
+
+/**
+ * struct hl_cq - describes a completion queue
+ * @hdev: pointer to the device structure
+ * @kernel_address: holds the queue's kernel virtual address
+ * @bus_address: holds the queue's DMA address
+ * @cq_idx: completion queue index in array
+ * @hw_queue_id: the id of the matching H/W queue
+ * @ci: ci inside the queue
+ * @pi: pi inside the queue
+ * @free_slots_cnt: counter of free slots in queue
+ */
+struct hl_cq {
+ struct hl_device *hdev;
+ u64 kernel_address;
+ dma_addr_t bus_address;
+ u32 cq_idx;
+ u32 hw_queue_id;
+ u32 ci;
+ u32 pi;
+ atomic_t free_slots_cnt;
+};
+
+/**
+ * struct hl_eq - describes the event queue (single one per device)
+ * @hdev: pointer to the device structure
+ * @kernel_address: holds the queue's kernel virtual address
+ * @bus_address: holds the queue's DMA address
+ * @ci: ci inside the queue
+ */
+struct hl_eq {
+ struct hl_device *hdev;
+ u64 kernel_address;
+ dma_addr_t bus_address;
+ u32 ci;
+};
+
+
+/*
+ * ASICs
+ */
+
+/**
+ * enum hl_asic_type - supported ASIC types.
+ * @ASIC_INVALID: Invalid ASIC type.
+ * @ASIC_GOYA: Goya device.
+ * @ASIC_GAUDI: Gaudi device.
+ */
+enum hl_asic_type {
+ ASIC_INVALID,
+ ASIC_GOYA,
+ ASIC_GAUDI
+};
+
+struct hl_cs_parser;
+
+/**
+ * enum hl_pm_mng_profile - power management profile.
+ * @PM_AUTO: internal clock is set by the Linux driver.
+ * @PM_MANUAL: internal clock is set by the user.
+ * @PM_LAST: last power management type.
+ */
+enum hl_pm_mng_profile {
+ PM_AUTO = 1,
+ PM_MANUAL,
+ PM_LAST
+};
+
+/**
+ * enum hl_pll_frequency - PLL frequency.
+ * @PLL_HIGH: high frequency.
+ * @PLL_LOW: low frequency.
+ * @PLL_LAST: last frequency values that were configured by the user.
+ */
+enum hl_pll_frequency {
+ PLL_HIGH = 1,
+ PLL_LOW,
+ PLL_LAST
+};
+
+#define PLL_REF_CLK 50
+
+enum div_select_defs {
+ DIV_SEL_REF_CLK = 0,
+ DIV_SEL_PLL_CLK = 1,
+ DIV_SEL_DIVIDED_REF = 2,
+ DIV_SEL_DIVIDED_PLL = 3,
+};
+
+/**
+ * struct hl_asic_funcs - ASIC specific functions that are can be called from
+ * common code.
+ * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
+ * @early_fini: tears down what was done in early_init.
+ * @late_init: sets up late driver/hw state (post hw_init) - Optional.
+ * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
+ * @sw_init: sets up driver state, does not configure H/W.
+ * @sw_fini: tears down driver state, does not configure H/W.
+ * @hw_init: sets up the H/W state.
+ * @hw_fini: tears down the H/W state.
+ * @halt_engines: halt engines, needed for reset sequence. This also disables
+ * interrupts from the device. Should be called before
+ * hw_fini and before CS rollback.
+ * @suspend: handles IP specific H/W or SW changes for suspend.
+ * @resume: handles IP specific H/W or SW changes for resume.
+ * @cb_mmap: maps a CB.
+ * @ring_doorbell: increment PI on a given QMAN.
+ * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
+ * function because the PQs are located in different memory areas
+ * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
+ * writing the PQE must match the destination memory area
+ * properties.
+ * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
+ * dma_alloc_coherent(). This is ASIC function because
+ * its implementation is not trivial when the driver
+ * is loaded in simulation mode (not upstreamed).
+ * @asic_dma_free_coherent: Free coherent DMA memory by calling
+ * dma_free_coherent(). This is ASIC function because
+ * its implementation is not trivial when the driver
+ * is loaded in simulation mode (not upstreamed).
+ * @get_int_queue_base: get the internal queue base address.
+ * @test_queues: run simple test on all queues for sanity check.
+ * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
+ * size of allocation is HL_DMA_POOL_BLK_SIZE.
+ * @asic_dma_pool_free: free small DMA allocation from pool.
+ * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
+ * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
+ * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
+ * @cs_parser: parse Command Submission.
+ * @asic_dma_map_sg: DMA map scatter-gather list.
+ * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
+ * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
+ * @update_eq_ci: update event queue CI.
+ * @context_switch: called upon ASID context switch.
+ * @restore_phase_topology: clear all SOBs amd MONs.
+ * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
+ * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
+ * @add_device_attr: add ASIC specific device attributes.
+ * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
+ * @set_pll_profile: change PLL profile (manual/automatic).
+ * @get_events_stat: retrieve event queue entries histogram.
+ * @read_pte: read MMU page table entry from DRAM.
+ * @write_pte: write MMU page table entry to DRAM.
+ * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
+ * (L1 only) or hard (L0 & L1) flush.
+ * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
+ * ASID-VA-size mask.
+ * @send_heartbeat: send is-alive packet to ArmCP and verify response.
+ * @enable_clock_gating: enable clock gating for reducing power consumption.
+ * @disable_clock_gating: disable clock for accessing registers on HBW.
+ * @debug_coresight: perform certain actions on Coresight for debugging.
+ * @is_device_idle: return true if device is idle, false otherwise.
+ * @soft_reset_late_init: perform certain actions needed after soft reset.
+ * @hw_queues_lock: acquire H/W queues lock.
+ * @hw_queues_unlock: release H/W queues lock.
+ * @get_pci_id: retrieve PCI ID.
+ * @get_eeprom_data: retrieve EEPROM data from F/W.
+ * @send_cpu_message: send buffer to ArmCP.
+ * @get_hw_state: retrieve the H/W state
+ * @pci_bars_map: Map PCI BARs.
+ * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
+ * old address the bar pointed to or U64_MAX for failure
+ * @init_iatu: Initialize the iATU unit inside the PCI controller.
+ * @rreg: Read a register. Needed for simulator support.
+ * @wreg: Write a register. Needed for simulator support.
+ * @halt_coresight: stop the ETF and ETR traces.
+ * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
+ * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
+ * @read_device_fw_version: read the device's firmware versions that are
+ * contained in registers
+ * @load_firmware_to_device: load the firmware to the device's memory
+ * @load_boot_fit_to_device: load boot fit to device's memory
+ * @get_signal_cb_size: Get signal CB size.
+ * @get_wait_cb_size: Get wait CB size.
+ * @gen_signal_cb: Generate a signal CB.
+ * @gen_wait_cb: Generate a wait CB.
+ * @reset_sob: Reset a SOB.
+ * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
+ * firmware configuration
+ * @get_device_time: Get the device time.
+ */
+struct hl_asic_funcs {
+ int (*early_init)(struct hl_device *hdev);
+ int (*early_fini)(struct hl_device *hdev);
+ int (*late_init)(struct hl_device *hdev);
+ void (*late_fini)(struct hl_device *hdev);
+ int (*sw_init)(struct hl_device *hdev);
+ int (*sw_fini)(struct hl_device *hdev);
+ int (*hw_init)(struct hl_device *hdev);
+ void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
+ void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
+ int (*suspend)(struct hl_device *hdev);
+ int (*resume)(struct hl_device *hdev);
+ int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+ u64 kaddress, phys_addr_t paddress, u32 size);
+ void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
+ void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
+ struct hl_bd *bd);
+ void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag);
+ void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle);
+ void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
+ dma_addr_t *dma_handle, u16 *queue_len);
+ int (*test_queues)(struct hl_device *hdev);
+ void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
+ gfp_t mem_flags, dma_addr_t *dma_handle);
+ void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
+ dma_addr_t dma_addr);
+ void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
+ size_t size, dma_addr_t *dma_handle);
+ void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
+ size_t size, void *vaddr);
+ void (*hl_dma_unmap_sg)(struct hl_device *hdev,
+ struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir);
+ int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
+ int (*asic_dma_map_sg)(struct hl_device *hdev,
+ struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir);
+ u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
+ struct sg_table *sgt);
+ void (*add_end_of_cb_packets)(struct hl_device *hdev,
+ u64 kernel_address, u32 len,
+ u64 cq_addr, u32 cq_val, u32 msix_num,
+ bool eb);
+ void (*update_eq_ci)(struct hl_device *hdev, u32 val);
+ int (*context_switch)(struct hl_device *hdev, u32 asid);
+ void (*restore_phase_topology)(struct hl_device *hdev);
+ int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
+ int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
+ int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
+ int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
+ void (*add_device_attr)(struct hl_device *hdev,
+ struct attribute_group *dev_attr_grp);
+ void (*handle_eqe)(struct hl_device *hdev,
+ struct hl_eq_entry *eq_entry);
+ void (*set_pll_profile)(struct hl_device *hdev,
+ enum hl_pll_frequency freq);
+ void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
+ u32 *size);
+ u64 (*read_pte)(struct hl_device *hdev, u64 addr);
+ void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
+ int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
+ u32 flags);
+ int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
+ u32 asid, u64 va, u64 size);
+ int (*send_heartbeat)(struct hl_device *hdev);
+ void (*enable_clock_gating)(struct hl_device *hdev);
+ void (*disable_clock_gating)(struct hl_device *hdev);
+ int (*debug_coresight)(struct hl_device *hdev, void *data);
+ bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
+ struct seq_file *s);
+ int (*soft_reset_late_init)(struct hl_device *hdev);
+ void (*hw_queues_lock)(struct hl_device *hdev);
+ void (*hw_queues_unlock)(struct hl_device *hdev);
+ u32 (*get_pci_id)(struct hl_device *hdev);
+ int (*get_eeprom_data)(struct hl_device *hdev, void *data,
+ size_t max_size);
+ int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
+ u16 len, u32 timeout, long *result);
+ enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
+ int (*pci_bars_map)(struct hl_device *hdev);
+ u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+ int (*init_iatu)(struct hl_device *hdev);
+ u32 (*rreg)(struct hl_device *hdev, u32 reg);
+ void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
+ void (*halt_coresight)(struct hl_device *hdev);
+ int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
+ u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
+ void (*read_device_fw_version)(struct hl_device *hdev,
+ enum hl_fw_component fwc);
+ int (*load_firmware_to_device)(struct hl_device *hdev);
+ int (*load_boot_fit_to_device)(struct hl_device *hdev);
+ u32 (*get_signal_cb_size)(struct hl_device *hdev);
+ u32 (*get_wait_cb_size)(struct hl_device *hdev);
+ void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
+ void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
+ u16 sob_val, u16 mon_id, u32 q_idx);
+ void (*reset_sob)(struct hl_device *hdev, void *data);
+ void (*set_dma_mask_from_fw)(struct hl_device *hdev);
+ u64 (*get_device_time)(struct hl_device *hdev);
+};
+
+
+/*
+ * CONTEXTS
+ */
+
+#define HL_KERNEL_ASID_ID 0
+
+/**
+ * struct hl_va_range - virtual addresses range.
+ * @lock: protects the virtual addresses list.
+ * @list: list of virtual addresses blocks available for mappings.
+ * @start_addr: range start address.
+ * @end_addr: range end address.
+ */
+struct hl_va_range {
+ struct mutex lock;
+ struct list_head list;
+ u64 start_addr;
+ u64 end_addr;
+};
+
+/**
+ * struct hl_ctx - user/kernel context.
+ * @mem_hash: holds mapping from virtual address to virtual memory area
+ * descriptor (hl_vm_phys_pg_list or hl_userptr).
+ * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
+ * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
+ * @hdev: pointer to the device structure.
+ * @refcount: reference counter for the context. Context is released only when
+ * this hits 0l. It is incremented on CS and CS_WAIT.
+ * @cs_pending: array of DMA fence objects representing pending CS.
+ * @host_va_range: holds available virtual addresses for host mappings.
+ * @host_huge_va_range: holds available virtual addresses for host mappings
+ * with huge pages.
+ * @dram_va_range: holds available virtual addresses for DRAM mappings.
+ * @mem_hash_lock: protects the mem_hash.
+ * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
+ * MMU hash or walking the PGT requires talking this lock.
+ * @debugfs_list: node in debugfs list of contexts.
+ * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
+ * to user so user could inquire about CS. It is used as
+ * index to cs_pending array.
+ * @dram_default_hops: array that holds all hops addresses needed for default
+ * DRAM mapping.
+ * @cs_lock: spinlock to protect cs_sequence.
+ * @dram_phys_mem: amount of used physical DRAM memory by this context.
+ * @thread_ctx_switch_token: token to prevent multiple threads of the same
+ * context from running the context switch phase.
+ * Only a single thread should run it.
+ * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
+ * the context switch phase from moving to their
+ * execution phase before the context switch phase
+ * has finished.
+ * @asid: context's unique address space ID in the device's MMU.
+ * @handle: context's opaque handle for user
+ */
+struct hl_ctx {
+ DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
+ DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
+ struct hl_fpriv *hpriv;
+ struct hl_device *hdev;
+ struct kref refcount;
+ struct dma_fence **cs_pending;
+ struct hl_va_range *host_va_range;
+ struct hl_va_range *host_huge_va_range;
+ struct hl_va_range *dram_va_range;
+ struct mutex mem_hash_lock;
+ struct mutex mmu_lock;
+ struct list_head debugfs_list;
+ struct hl_cs_counters cs_counters;
+ u64 cs_sequence;
+ u64 *dram_default_hops;
+ spinlock_t cs_lock;
+ atomic64_t dram_phys_mem;
+ atomic_t thread_ctx_switch_token;
+ u32 thread_ctx_switch_wait_token;
+ u32 asid;
+ u32 handle;
+};
+
+/**
+ * struct hl_ctx_mgr - for handling multiple contexts.
+ * @ctx_lock: protects ctx_handles.
+ * @ctx_handles: idr to hold all ctx handles.
+ */
+struct hl_ctx_mgr {
+ struct mutex ctx_lock;
+ struct idr ctx_handles;
+};
+
+
+
+/*
+ * COMMAND SUBMISSIONS
+ */
+
+/**
+ * struct hl_userptr - memory mapping chunk information
+ * @vm_type: type of the VM.
+ * @job_node: linked-list node for hanging the object on the Job's list.
+ * @vec: pointer to the frame vector.
+ * @sgt: pointer to the scatter-gather table that holds the pages.
+ * @dir: for DMA unmapping, the direction must be supplied, so save it.
+ * @debugfs_list: node in debugfs list of command submissions.
+ * @addr: user-space virtual address of the start of the memory area.
+ * @size: size of the memory area to pin & map.
+ * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
+ */
+struct hl_userptr {
+ enum vm_type_t vm_type; /* must be first */
+ struct list_head job_node;
+ struct frame_vector *vec;
+ struct sg_table *sgt;
+ enum dma_data_direction dir;
+ struct list_head debugfs_list;
+ u64 addr;
+ u32 size;
+ u8 dma_mapped;
+};
+
+/**
+ * struct hl_cs - command submission.
+ * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
+ * @ctx: the context this CS belongs to.
+ * @job_list: list of the CS's jobs in the various queues.
+ * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
+ * @refcount: reference counter for usage of the CS.
+ * @fence: pointer to the fence object of this CS.
+ * @signal_fence: pointer to the fence object of the signal CS (used by wait
+ * CS only).
+ * @finish_work: workqueue object to run when CS is completed by H/W.
+ * @work_tdr: delayed work node for TDR.
+ * @mirror_node : node in device mirror list of command submissions.
+ * @debugfs_list: node in debugfs list of command submissions.
+ * @sequence: the sequence number of this CS.
+ * @type: CS_TYPE_*.
+ * @submitted: true if CS was submitted to H/W.
+ * @completed: true if CS was completed by device.
+ * @timedout : true if CS was timedout.
+ * @tdr_active: true if TDR was activated for this CS (to prevent
+ * double TDR activation).
+ * @aborted: true if CS was aborted due to some device error.
+ */
+struct hl_cs {
+ u16 *jobs_in_queue_cnt;
+ struct hl_ctx *ctx;
+ struct list_head job_list;
+ spinlock_t job_lock;
+ struct kref refcount;
+ struct dma_fence *fence;
+ struct dma_fence *signal_fence;
+ struct work_struct finish_work;
+ struct delayed_work work_tdr;
+ struct list_head mirror_node;
+ struct list_head debugfs_list;
+ u64 sequence;
+ enum hl_cs_type type;
+ u8 submitted;
+ u8 completed;
+ u8 timedout;
+ u8 tdr_active;
+ u8 aborted;
+};
+
+/**
+ * struct hl_cs_job - command submission job.
+ * @cs_node: the node to hang on the CS jobs list.
+ * @cs: the CS this job belongs to.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ * the queue instead of the CB we got from the IOCTL.
+ * @finish_work: workqueue object to run when job is completed.
+ * @userptr_list: linked-list of userptr mappings that belong to this job and
+ * wait for completion.
+ * @debugfs_list: node in debugfs list of command submission jobs.
+ * @queue_type: the type of the H/W queue this job is submitted to.
+ * @id: the id of this job inside a CS.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @job_cb_size: the actual size of the CB that we put on the queue.
+ * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ * handle to a kernel-allocated CB object, false
+ * otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ * info is needed later, when adding the 2xMSG_PROT at the
+ * end of the JOB, to know which barriers to put in the
+ * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ * have streams so the engine can't be busy by another
+ * stream.
+ */
+struct hl_cs_job {
+ struct list_head cs_node;
+ struct hl_cs *cs;
+ struct hl_cb *user_cb;
+ struct hl_cb *patched_cb;
+ struct work_struct finish_work;
+ struct list_head userptr_list;
+ struct list_head debugfs_list;
+ enum hl_queue_type queue_type;
+ u32 id;
+ u32 hw_queue_id;
+ u32 user_cb_size;
+ u32 job_cb_size;
+ u8 is_kernel_allocated_cb;
+ u8 contains_dma_pkt;
+};
+
+/**
+ * struct hl_cs_parser - command submission parser properties.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ * the queue instead of the CB we got from the IOCTL.
+ * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ * job and wait for completion.
+ * @cs_sequence: the sequence number of the related CS.
+ * @queue_type: the type of the H/W queue this job is submitted to.
+ * @ctx_id: the ID of the context the related CS belongs to.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @patched_cb_size: the size of the CB after parsing.
+ * @job_id: the id of the related job inside the related CS.
+ * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ * handle to a kernel-allocated CB object, false
+ * otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ * info is needed later, when adding the 2xMSG_PROT at the
+ * end of the JOB, to know which barriers to put in the
+ * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ * have streams so the engine can't be busy by another
+ * stream.
+ */
+struct hl_cs_parser {
+ struct hl_cb *user_cb;
+ struct hl_cb *patched_cb;
+ struct list_head *job_userptr_list;
+ u64 cs_sequence;
+ enum hl_queue_type queue_type;
+ u32 ctx_id;
+ u32 hw_queue_id;
+ u32 user_cb_size;
+ u32 patched_cb_size;
+ u8 job_id;
+ u8 is_kernel_allocated_cb;
+ u8 contains_dma_pkt;
+};
+
+
+/*
+ * MEMORY STRUCTURE
+ */
+
+/**
+ * struct hl_vm_hash_node - hash element from virtual address to virtual
+ * memory area descriptor (hl_vm_phys_pg_list or
+ * hl_userptr).
+ * @node: node to hang on the hash table in context object.
+ * @vaddr: key virtual address.
+ * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
+ */
+struct hl_vm_hash_node {
+ struct hlist_node node;
+ u64 vaddr;
+ void *ptr;
+};
+
+/**
+ * struct hl_vm_phys_pg_pack - physical page pack.
+ * @vm_type: describes the type of the virtual area descriptor.
+ * @pages: the physical page array.
+ * @npages: num physical pages in the pack.
+ * @total_size: total size of all the pages in this list.
+ * @mapping_cnt: number of shared mappings.
+ * @asid: the context related to this list.
+ * @page_size: size of each page in the pack.
+ * @flags: HL_MEM_* flags related to this list.
+ * @handle: the provided handle related to this list.
+ * @offset: offset from the first page.
+ * @contiguous: is contiguous physical memory.
+ * @created_from_userptr: is product of host virtual address.
+ */
+struct hl_vm_phys_pg_pack {
+ enum vm_type_t vm_type; /* must be first */
+ u64 *pages;
+ u64 npages;
+ u64 total_size;
+ atomic_t mapping_cnt;
+ u32 asid;
+ u32 page_size;
+ u32 flags;
+ u32 handle;
+ u32 offset;
+ u8 contiguous;
+ u8 created_from_userptr;
+};
+
+/**
+ * struct hl_vm_va_block - virtual range block information.
+ * @node: node to hang on the virtual range list in context object.
+ * @start: virtual range start address.
+ * @end: virtual range end address.
+ * @size: virtual range size.
+ */
+struct hl_vm_va_block {
+ struct list_head node;
+ u64 start;
+ u64 end;
+ u64 size;
+};
+
+/**
+ * struct hl_vm - virtual memory manager for MMU.
+ * @dram_pg_pool: pool for DRAM physical pages of 2MB.
+ * @dram_pg_pool_refcount: reference counter for the pool usage.
+ * @idr_lock: protects the phys_pg_list_handles.
+ * @phys_pg_pack_handles: idr to hold all device allocations handles.
+ * @init_done: whether initialization was done. We need this because VM
+ * initialization might be skipped during device initialization.
+ */
+struct hl_vm {
+ struct gen_pool *dram_pg_pool;
+ struct kref dram_pg_pool_refcount;
+ spinlock_t idr_lock;
+ struct idr phys_pg_pack_handles;
+ u8 init_done;
+};
+
+
+/*
+ * DEBUG, PROFILING STRUCTURE
+ */
+
+/**
+ * struct hl_debug_params - Coresight debug parameters.
+ * @input: pointer to component specific input parameters.
+ * @output: pointer to component specific output parameters.
+ * @output_size: size of output buffer.
+ * @reg_idx: relevant register ID.
+ * @op: component operation to execute.
+ * @enable: true if to enable component debugging, false otherwise.
+ */
+struct hl_debug_params {
+ void *input;
+ void *output;
+ u32 output_size;
+ u32 reg_idx;
+ u32 op;
+ bool enable;
+};
+
+/*
+ * FILE PRIVATE STRUCTURE
+ */
+
+/**
+ * struct hl_fpriv - process information stored in FD private data.
+ * @hdev: habanalabs device structure.
+ * @filp: pointer to the given file structure.
+ * @taskpid: current process ID.
+ * @ctx: current executing context. TODO: remove for multiple ctx per process
+ * @ctx_mgr: context manager to handle multiple context for this FD.
+ * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
+ * @debugfs_list: list of relevant ASIC debugfs.
+ * @dev_node: node in the device list of file private data
+ * @refcount: number of related contexts.
+ * @restore_phase_mutex: lock for context switch and restore phase.
+ * @is_control: true for control device, false otherwise
+ */
+struct hl_fpriv {
+ struct hl_device *hdev;
+ struct file *filp;
+ struct pid *taskpid;
+ struct hl_ctx *ctx;
+ struct hl_ctx_mgr ctx_mgr;
+ struct hl_cb_mgr cb_mgr;
+ struct list_head debugfs_list;
+ struct list_head dev_node;
+ struct kref refcount;
+ struct mutex restore_phase_mutex;
+ u8 is_control;
+};
+
+
+/*
+ * DebugFS
+ */
+
+/**
+ * struct hl_info_list - debugfs file ops.
+ * @name: file name.
+ * @show: function to output information.
+ * @write: function to write to the file.
+ */
+struct hl_info_list {
+ const char *name;
+ int (*show)(struct seq_file *s, void *data);
+ ssize_t (*write)(struct file *file, const char __user *buf,
+ size_t count, loff_t *f_pos);
+};
+
+/**
+ * struct hl_debugfs_entry - debugfs dentry wrapper.
+ * @dent: base debugfs entry structure.
+ * @info_ent: dentry realted ops.
+ * @dev_entry: ASIC specific debugfs manager.
+ */
+struct hl_debugfs_entry {
+ struct dentry *dent;
+ const struct hl_info_list *info_ent;
+ struct hl_dbg_device_entry *dev_entry;
+};
+
+/**
+ * struct hl_dbg_device_entry - ASIC specific debugfs manager.
+ * @root: root dentry.
+ * @hdev: habanalabs device structure.
+ * @entry_arr: array of available hl_debugfs_entry.
+ * @file_list: list of available debugfs files.
+ * @file_mutex: protects file_list.
+ * @cb_list: list of available CBs.
+ * @cb_spinlock: protects cb_list.
+ * @cs_list: list of available CSs.
+ * @cs_spinlock: protects cs_list.
+ * @cs_job_list: list of available CB jobs.
+ * @cs_job_spinlock: protects cs_job_list.
+ * @userptr_list: list of available userptrs (virtual memory chunk descriptor).
+ * @userptr_spinlock: protects userptr_list.
+ * @ctx_mem_hash_list: list of available contexts with MMU mappings.
+ * @ctx_mem_hash_spinlock: protects cb_list.
+ * @addr: next address to read/write from/to in read/write32.
+ * @mmu_addr: next virtual address to translate to physical address in mmu_show.
+ * @mmu_asid: ASID to use while translating in mmu_show.
+ * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
+ * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read.
+ * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read.
+ */
+struct hl_dbg_device_entry {
+ struct dentry *root;
+ struct hl_device *hdev;
+ struct hl_debugfs_entry *entry_arr;
+ struct list_head file_list;
+ struct mutex file_mutex;
+ struct list_head cb_list;
+ spinlock_t cb_spinlock;
+ struct list_head cs_list;
+ spinlock_t cs_spinlock;
+ struct list_head cs_job_list;
+ spinlock_t cs_job_spinlock;
+ struct list_head userptr_list;
+ spinlock_t userptr_spinlock;
+ struct list_head ctx_mem_hash_list;
+ spinlock_t ctx_mem_hash_spinlock;
+ u64 addr;
+ u64 mmu_addr;
+ u32 mmu_asid;
+ u8 i2c_bus;
+ u8 i2c_addr;
+ u8 i2c_reg;
+};
+
+
+/*
+ * DEVICES
+ */
+
+/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
+ * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
+ */
+#define HL_MAX_MINORS 256
+
+/*
+ * Registers read & write functions.
+ */
+
+u32 hl_rreg(struct hl_device *hdev, u32 reg);
+void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
+
+#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
+#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
+#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
+ hdev->asic_funcs->rreg(hdev, (reg)))
+
+#define WREG32_P(reg, val, mask) \
+ do { \
+ u32 tmp_ = RREG32(reg); \
+ tmp_ &= (mask); \
+ tmp_ |= ((val) & ~(mask)); \
+ WREG32(reg, tmp_); \
+ } while (0)
+#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
+#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
+
+#define RMWREG32(reg, val, mask) \
+ do { \
+ u32 tmp_ = RREG32(reg); \
+ tmp_ &= ~(mask); \
+ tmp_ |= ((val) << __ffs(mask)); \
+ WREG32(reg, tmp_); \
+ } while (0)
+
+#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
+
+#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
+#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
+#define WREG32_FIELD(reg, offset, field, val) \
+ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
+ ~REG_FIELD_MASK(reg, field)) | \
+ (val) << REG_FIELD_SHIFT(reg, field))
+
+/* Timeout should be longer when working with simulator but cap the
+ * increased timeout to some maximum
+ */
+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+({ \
+ ktime_t __timeout; \
+ if (hdev->pdev) \
+ __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ else \
+ __timeout = ktime_add_us(ktime_get(),\
+ min((u64)(timeout_us * 10), \
+ (u64) HL_SIM_MAX_TIMEOUT_US)); \
+ might_sleep_if(sleep_us); \
+ for (;;) { \
+ (val) = RREG32(addr); \
+ if (cond) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ (val) = RREG32(addr); \
+ break; \
+ } \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/*
+ * address in this macro points always to a memory location in the
+ * host's (server's) memory. That location is updated asynchronously
+ * either by the direct access of the device or by another core.
+ *
+ * To work both in LE and BE architectures, we need to distinguish between the
+ * two states (device or another core updates the memory location). Therefore,
+ * if mem_written_by_device is true, the host memory being polled will be
+ * updated directly by the device. If false, the host memory being polled will
+ * be updated by host CPU. Required so host knows whether or not the memory
+ * might need to be byte-swapped before returning value to caller.
+ */
+#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
+ mem_written_by_device) \
+({ \
+ ktime_t __timeout; \
+ if (hdev->pdev) \
+ __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ else \
+ __timeout = ktime_add_us(ktime_get(),\
+ min((u64)(timeout_us * 10), \
+ (u64) HL_SIM_MAX_TIMEOUT_US)); \
+ might_sleep_if(sleep_us); \
+ for (;;) { \
+ /* Verify we read updates done by other cores or by device */ \
+ mb(); \
+ (val) = *((u32 *) (uintptr_t) (addr)); \
+ if (mem_written_by_device) \
+ (val) = le32_to_cpu(*(__le32 *) &(val)); \
+ if (cond) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ (val) = *((u32 *) (uintptr_t) (addr)); \
+ if (mem_written_by_device) \
+ (val) = le32_to_cpu(*(__le32 *) &(val)); \
+ break; \
+ } \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+})
+
+#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
+ timeout_us) \
+({ \
+ ktime_t __timeout; \
+ if (hdev->pdev) \
+ __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ else \
+ __timeout = ktime_add_us(ktime_get(),\
+ min((u64)(timeout_us * 10), \
+ (u64) HL_SIM_MAX_TIMEOUT_US)); \
+ might_sleep_if(sleep_us); \
+ for (;;) { \
+ (val) = readl(addr); \
+ if (cond) \
+ break; \
+ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ (val) = readl(addr); \
+ break; \
+ } \
+ if (sleep_us) \
+ usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ } \
+ (cond) ? 0 : -ETIMEDOUT; \
+})
+
+struct hwmon_chip_info;
+
+/**
+ * struct hl_device_reset_work - reset workqueue task wrapper.
+ * @reset_work: reset work to be done.
+ * @hdev: habanalabs device structure.
+ */
+struct hl_device_reset_work {
+ struct work_struct reset_work;
+ struct hl_device *hdev;
+};
+
+/**
+ * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
+ * @idle_to_busy_ts: timestamp where device changed from idle to busy.
+ * @busy_to_idle_ts: timestamp where device changed from busy to idle.
+ */
+struct hl_device_idle_busy_ts {
+ ktime_t idle_to_busy_ts;
+ ktime_t busy_to_idle_ts;
+};
+
+/**
+ * struct hl_device - habanalabs device structure.
+ * @pdev: pointer to PCI device, can be NULL in case of simulator device.
+ * @pcie_bar_phys: array of available PCIe bars physical addresses.
+ * (required only for PCI address match mode)
+ * @pcie_bar: array of available PCIe bars virtual addresses.
+ * @rmmio: configuration area address on SRAM.
+ * @cdev: related char device.
+ * @cdev_ctrl: char device for control operations only (INFO IOCTL)
+ * @dev: related kernel basic device structure.
+ * @dev_ctrl: related kernel device structure for the control device
+ * @work_freq: delayed work to lower device frequency if possible.
+ * @work_heartbeat: delayed work for ArmCP is-alive check.
+ * @asic_name: ASIC specific nmae.
+ * @asic_type: ASIC specific type.
+ * @completion_queue: array of hl_cq.
+ * @cq_wq: work queues of completion queues for executing work in process
+ * context.
+ * @eq_wq: work queue of event queue for executing work in process context.
+ * @kernel_ctx: Kernel driver context structure.
+ * @kernel_queues: array of hl_hw_queue.
+ * @hw_queues_mirror_list: CS mirror list for TDR.
+ * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
+ * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
+ * @event_queue: event queue for IRQ from ArmCP.
+ * @dma_pool: DMA pool for small allocations.
+ * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
+ * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
+ * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
+ * @asid_bitmap: holds used/available ASIDs.
+ * @asid_mutex: protects asid_bitmap.
+ * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
+ * @debug_lock: protects critical section of setting debug mode for device
+ * @asic_prop: ASIC specific immutable properties.
+ * @asic_funcs: ASIC specific functions.
+ * @asic_specific: ASIC specific information to use only from ASIC files.
+ * @mmu_pgt_pool: pool of available MMU hops.
+ * @vm: virtual memory manager for MMU.
+ * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
+ * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
+ * @hwmon_dev: H/W monitor device.
+ * @pm_mng_profile: current power management profile.
+ * @hl_chip_info: ASIC's sensors information.
+ * @hl_debugfs: device's debugfs manager.
+ * @cb_pool: list of preallocated CBs.
+ * @cb_pool_lock: protects the CB pool.
+ * @fpriv_list: list of file private data structures. Each structure is created
+ * when a user opens the device
+ * @fpriv_list_lock: protects the fpriv_list
+ * @compute_ctx: current compute context executing.
+ * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
+ * and vice-versa
+ * @aggregated_cs_counters: aggregated cs counters among all contexts
+ * @dram_used_mem: current DRAM memory consumption.
+ * @timeout_jiffies: device CS timeout value.
+ * @max_power: the max power of the device, as configured by the sysadmin. This
+ * value is saved so in case of hard-reset, the driver will restore
+ * this value and update the F/W after the re-initialization
+ * @in_reset: is device in reset flow.
+ * @curr_pll_profile: current PLL profile.
+ * @cs_active_cnt: number of active command submissions on this device (active
+ * means already in H/W queues)
+ * @major: habanalabs kernel driver major.
+ * @high_pll: high PLL profile frequency.
+ * @soft_reset_cnt: number of soft reset since the driver was loaded.
+ * @hard_reset_cnt: number of hard reset since the driver was loaded.
+ * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
+ * @id: device minor.
+ * @id_control: minor of the control device
+ * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
+ * addresses.
+ * @disabled: is device disabled.
+ * @late_init_done: is late init stage was done during initialization.
+ * @hwmon_initialized: is H/W monitor sensors was initialized.
+ * @hard_reset_pending: is there a hard reset work pending.
+ * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
+ * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
+ * otherwise.
+ * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
+ * @dram_default_page_mapping: is DRAM default page mapping enabled.
+ * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
+ * huge pages.
+ * @init_done: is the initialization of the device done.
+ * @mmu_enable: is MMU enabled.
+ * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
+ * @clock_gating: is clock gating enabled.
+ * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
+ * @dma_mask: the dma mask that was set for this device
+ * @in_debug: is device under debug. This, together with fpriv_list, enforces
+ * that only a single user is configuring the debug infrastructure.
+ * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
+ * only to POWER9 machines.
+ * @cdev_sysfs_created: were char devices and sysfs nodes created.
+ * @stop_on_err: true if engines should stop on error.
+ * @supports_sync_stream: is sync stream supported.
+ * @sync_stream_queue_idx: helper index for sync stream queues initialization.
+ * @supports_coresight: is CoreSight supported.
+ * @supports_soft_reset: is soft reset supported.
+ */
+struct hl_device {
+ struct pci_dev *pdev;
+ u64 pcie_bar_phys[HL_PCI_NUM_BARS];
+ void __iomem *pcie_bar[HL_PCI_NUM_BARS];
+ void __iomem *rmmio;
+ struct cdev cdev;
+ struct cdev cdev_ctrl;
+ struct device *dev;
+ struct device *dev_ctrl;
+ struct delayed_work work_freq;
+ struct delayed_work work_heartbeat;
+ char asic_name[16];
+ enum hl_asic_type asic_type;
+ struct hl_cq *completion_queue;
+ struct workqueue_struct **cq_wq;
+ struct workqueue_struct *eq_wq;
+ struct hl_ctx *kernel_ctx;
+ struct hl_hw_queue *kernel_queues;
+ struct list_head hw_queues_mirror_list;
+ spinlock_t hw_queues_mirror_lock;
+ struct hl_cb_mgr kernel_cb_mgr;
+ struct hl_eq event_queue;
+ struct dma_pool *dma_pool;
+ void *cpu_accessible_dma_mem;
+ dma_addr_t cpu_accessible_dma_address;
+ struct gen_pool *cpu_accessible_dma_pool;
+ unsigned long *asid_bitmap;
+ struct mutex asid_mutex;
+ struct mutex send_cpu_message_lock;
+ struct mutex debug_lock;
+ struct asic_fixed_properties asic_prop;
+ const struct hl_asic_funcs *asic_funcs;
+ void *asic_specific;
+ struct gen_pool *mmu_pgt_pool;
+ struct hl_vm vm;
+ struct mutex mmu_cache_lock;
+ void *mmu_shadow_hop0;
+ struct device *hwmon_dev;
+ enum hl_pm_mng_profile pm_mng_profile;
+ struct hwmon_chip_info *hl_chip_info;
+
+ struct hl_dbg_device_entry hl_debugfs;
+
+ struct list_head cb_pool;
+ spinlock_t cb_pool_lock;
+
+ struct list_head fpriv_list;
+ struct mutex fpriv_list_lock;
+
+ struct hl_ctx *compute_ctx;
+
+ struct hl_device_idle_busy_ts *idle_busy_ts_arr;
+
+ struct hl_cs_counters aggregated_cs_counters;
+
+ atomic64_t dram_used_mem;
+ u64 timeout_jiffies;
+ u64 max_power;
+ atomic_t in_reset;
+ enum hl_pll_frequency curr_pll_profile;
+ int cs_active_cnt;
+ u32 major;
+ u32 high_pll;
+ u32 soft_reset_cnt;
+ u32 hard_reset_cnt;
+ u32 idle_busy_ts_idx;
+ u16 id;
+ u16 id_control;
+ u16 cpu_pci_msb_addr;
+ u8 disabled;
+ u8 late_init_done;
+ u8 hwmon_initialized;
+ u8 hard_reset_pending;
+ u8 heartbeat;
+ u8 reset_on_lockup;
+ u8 dram_supports_virtual_memory;
+ u8 dram_default_page_mapping;
+ u8 pmmu_huge_range;
+ u8 init_done;
+ u8 clock_gating;
+ u8 device_cpu_disabled;
+ u8 dma_mask;
+ u8 in_debug;
+ u8 power9_64bit_dma_enable;
+ u8 cdev_sysfs_created;
+ u8 stop_on_err;
+ u8 supports_sync_stream;
+ u8 sync_stream_queue_idx;
+ u8 supports_coresight;
+ u8 supports_soft_reset;
+
+ /* Parameters for bring-up */
+ u8 mmu_enable;
+ u8 mmu_huge_page_opt;
+ u8 cpu_enable;
+ u8 reset_pcilink;
+ u8 cpu_queues_enable;
+ u8 fw_loading;
+ u8 pldm;
+ u8 axi_drain;
+ u8 sram_scrambler_enable;
+ u8 dram_scrambler_enable;
+ u8 hard_reset_on_fw_events;
+ u8 bmc_enable;
+ u8 rl_enable;
+};
+
+
+/*
+ * IOCTLs
+ */
+
+/**
+ * typedef hl_ioctl_t - typedef for ioctl function in the driver
+ * @hpriv: pointer to the FD's private data, which contains state of
+ * user process
+ * @data: pointer to the input/output arguments structure of the IOCTL
+ *
+ * Return: 0 for success, negative value for error
+ */
+typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
+
+/**
+ * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
+ * @cmd: the IOCTL code as created by the kernel macros.
+ * @func: pointer to the driver's function that should be called for this IOCTL.
+ */
+struct hl_ioctl_desc {
+ unsigned int cmd;
+ hl_ioctl_t *func;
+};
+
+
+/*
+ * Kernel module functions that can be accessed by entire module
+ */
+
+/**
+ * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area is inside the valid range, false otherwise.
+ */
+static inline bool hl_mem_area_inside_range(u64 address, u32 size,
+ u64 range_start_address, u64 range_end_address)
+{
+ u64 end_address = address + size;
+
+ if ((address >= range_start_address) &&
+ (end_address <= range_end_address) &&
+ (end_address > address))
+ return true;
+
+ return false;
+}
+
+/**
+ * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area overlaps part or all of the valid range,
+ * false otherwise.
+ */
+static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
+ u64 range_start_address, u64 range_end_address)
+{
+ u64 end_address = address + size;
+
+ if ((address >= range_start_address) &&
+ (address < range_end_address))
+ return true;
+
+ if ((end_address >= range_start_address) &&
+ (end_address < range_end_address))
+ return true;
+
+ if ((address < range_start_address) &&
+ (end_address >= range_end_address))
+ return true;
+
+ return false;
+}
+
+int hl_device_open(struct inode *inode, struct file *filp);
+int hl_device_open_ctrl(struct inode *inode, struct file *filp);
+bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
+enum hl_device_status hl_device_status(struct hl_device *hdev);
+int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
+int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
+ enum hl_asic_type asic_type, int minor);
+void destroy_hdev(struct hl_device *hdev);
+int hl_hw_queues_create(struct hl_device *hdev);
+void hl_hw_queues_destroy(struct hl_device *hdev);
+int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
+ u32 cb_size, u64 cb_ptr);
+int hl_hw_queue_schedule_cs(struct hl_cs *cs);
+u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
+void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
+void hl_int_hw_queue_update_ci(struct hl_cs *cs);
+void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
+
+#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
+#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1))
+
+int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
+void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
+int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
+void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
+void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
+void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
+irqreturn_t hl_irq_handler_cq(int irq, void *arg);
+irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+u32 hl_cq_inc_ptr(u32 ptr);
+
+int hl_asid_init(struct hl_device *hdev);
+void hl_asid_fini(struct hl_device *hdev);
+unsigned long hl_asid_alloc(struct hl_device *hdev);
+void hl_asid_free(struct hl_device *hdev, unsigned long asid);
+
+int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
+void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
+int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
+void hl_ctx_do_release(struct kref *ref);
+void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
+int hl_ctx_put(struct hl_ctx *ctx);
+struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
+void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
+
+int hl_device_init(struct hl_device *hdev, struct class *hclass);
+void hl_device_fini(struct hl_device *hdev);
+int hl_device_suspend(struct hl_device *hdev);
+int hl_device_resume(struct hl_device *hdev);
+int hl_device_reset(struct hl_device *hdev, bool hard_reset,
+ bool from_hard_reset_thread);
+void hl_hpriv_get(struct hl_fpriv *hpriv);
+void hl_hpriv_put(struct hl_fpriv *hpriv);
+int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
+uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
+
+int hl_build_hwmon_channel_info(struct hl_device *hdev,
+ struct armcp_sensor *sensors_arr);
+
+int hl_sysfs_init(struct hl_device *hdev);
+void hl_sysfs_fini(struct hl_device *hdev);
+
+int hl_hwmon_init(struct hl_device *hdev);
+void hl_hwmon_fini(struct hl_device *hdev);
+
+int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
+ u64 *handle, int ctx_id);
+int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
+int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+ u32 handle);
+void hl_cb_put(struct hl_cb *cb);
+void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
+void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
+int hl_cb_pool_init(struct hl_device *hdev);
+int hl_cb_pool_fini(struct hl_device *hdev);
+
+void hl_cs_rollback_all(struct hl_device *hdev);
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
+ enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
+void hl_sob_reset_error(struct kref *ref);
+
+void goya_set_asic_funcs(struct hl_device *hdev);
+void gaudi_set_asic_funcs(struct hl_device *hdev);
+
+int hl_vm_ctx_init(struct hl_ctx *ctx);
+void hl_vm_ctx_fini(struct hl_ctx *ctx);
+
+int hl_vm_init(struct hl_device *hdev);
+void hl_vm_fini(struct hl_device *hdev);
+
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
+ struct hl_userptr *userptr);
+void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
+void hl_userptr_delete_list(struct hl_device *hdev,
+ struct list_head *userptr_list);
+bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
+ struct list_head *userptr_list,
+ struct hl_userptr **userptr);
+
+int hl_mmu_init(struct hl_device *hdev);
+void hl_mmu_fini(struct hl_device *hdev);
+int hl_mmu_ctx_init(struct hl_ctx *ctx);
+void hl_mmu_ctx_fini(struct hl_ctx *ctx);
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ u32 page_size, bool flush_pte);
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+ bool flush_pte);
+void hl_mmu_swap_out(struct hl_ctx *ctx);
+void hl_mmu_swap_in(struct hl_ctx *ctx);
+
+int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
+ void __iomem *dst);
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
+int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+ u16 len, u32 timeout, long *result);
+int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
+int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
+ size_t irq_arr_size);
+int hl_fw_test_cpu_queue(struct hl_device *hdev);
+void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle);
+void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ void *vaddr);
+int hl_fw_send_heartbeat(struct hl_device *hdev);
+int hl_fw_armcp_info_get(struct hl_device *hdev);
+int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
+ u32 boot_err0_reg, bool skip_bmc,
+ u32 cpu_timeout, u32 boot_fit_timeout);
+
+int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
+ bool is_wc[3]);
+int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
+int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
+ u64 addr);
+int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
+ struct hl_inbound_pci_region *pci_region);
+int hl_pci_set_outbound_region(struct hl_device *hdev,
+ struct hl_outbound_pci_region *pci_region);
+int hl_pci_init(struct hl_device *hdev);
+void hl_pci_fini(struct hl_device *hdev);
+
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
+int hl_get_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_set_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value);
+int hl_get_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_fan_speed(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+int hl_get_pwm_info(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value);
+void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
+ long value);
+u64 hl_get_max_power(struct hl_device *hdev);
+void hl_set_max_power(struct hl_device *hdev, u64 value);
+int hl_set_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value);
+int hl_set_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value);
+
+#ifdef CONFIG_DEBUG_FS
+
+void hl_debugfs_init(void);
+void hl_debugfs_fini(void);
+void hl_debugfs_add_device(struct hl_device *hdev);
+void hl_debugfs_remove_device(struct hl_device *hdev);
+void hl_debugfs_add_file(struct hl_fpriv *hpriv);
+void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
+void hl_debugfs_add_cb(struct hl_cb *cb);
+void hl_debugfs_remove_cb(struct hl_cb *cb);
+void hl_debugfs_add_cs(struct hl_cs *cs);
+void hl_debugfs_remove_cs(struct hl_cs *cs);
+void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
+void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
+void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
+void hl_debugfs_remove_userptr(struct hl_device *hdev,
+ struct hl_userptr *userptr);
+void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+
+#else
+
+static inline void __init hl_debugfs_init(void)
+{
+}
+
+static inline void hl_debugfs_fini(void)
+{
+}
+
+static inline void hl_debugfs_add_device(struct hl_device *hdev)
+{
+}
+
+static inline void hl_debugfs_remove_device(struct hl_device *hdev)
+{
+}
+
+static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
+{
+}
+
+static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
+{
+}
+
+static inline void hl_debugfs_add_cb(struct hl_cb *cb)
+{
+}
+
+static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
+{
+}
+
+static inline void hl_debugfs_add_cs(struct hl_cs *cs)
+{
+}
+
+static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
+{
+}
+
+static inline void hl_debugfs_add_job(struct hl_device *hdev,
+ struct hl_cs_job *job)
+{
+}
+
+static inline void hl_debugfs_remove_job(struct hl_device *hdev,
+ struct hl_cs_job *job)
+{
+}
+
+static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
+ struct hl_userptr *userptr)
+{
+}
+
+static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
+ struct hl_userptr *userptr)
+{
+}
+
+static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
+ struct hl_ctx *ctx)
+{
+}
+
+static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
+ struct hl_ctx *ctx)
+{
+}
+
+#endif
+
+/* IOCTLs */
+long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
+int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
+
+#endif /* HABANALABSP_H_ */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#define pr_fmt(fmt) "habanalabs: " fmt
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+#include <linux/module.h>
+
+#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
+
+#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
+
+MODULE_AUTHOR(HL_DRIVER_AUTHOR);
+MODULE_DESCRIPTION(HL_DRIVER_DESC);
+MODULE_LICENSE("GPL v2");
+
+static int hl_major;
+static struct class *hl_class;
+static DEFINE_IDR(hl_devs_idr);
+static DEFINE_MUTEX(hl_devs_idr_lock);
+
+static int timeout_locked = 5;
+static int reset_on_lockup = 1;
+
+module_param(timeout_locked, int, 0444);
+MODULE_PARM_DESC(timeout_locked,
+ "Device lockup timeout in seconds (0 = disabled, default 5s)");
+
+module_param(reset_on_lockup, int, 0444);
+MODULE_PARM_DESC(reset_on_lockup,
+ "Do device reset on lockup (0 = no, 1 = yes, default yes)");
+
+#define PCI_VENDOR_ID_HABANALABS 0x1da3
+
+#define PCI_IDS_GOYA 0x0001
+#define PCI_IDS_GAUDI 0x1000
+
+static const struct pci_device_id ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
+ { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ids);
+
+/*
+ * get_asic_type - translate device id to asic type
+ *
+ * @device: id of the PCI device
+ *
+ * Translate device id to asic type.
+ * In case of unidentified device, return -1
+ */
+static enum hl_asic_type get_asic_type(u16 device)
+{
+ enum hl_asic_type asic_type;
+
+ switch (device) {
+ case PCI_IDS_GOYA:
+ asic_type = ASIC_GOYA;
+ break;
+ case PCI_IDS_GAUDI:
+ asic_type = ASIC_GAUDI;
+ break;
+ default:
+ asic_type = ASIC_INVALID;
+ break;
+ }
+
+ return asic_type;
+}
+
+/*
+ * hl_device_open - open function for habanalabs device
+ *
+ * @inode: pointer to inode structure
+ * @filp: pointer to file structure
+ *
+ * Called when process opens an habanalabs device.
+ */
+int hl_device_open(struct inode *inode, struct file *filp)
+{
+ struct hl_device *hdev;
+ struct hl_fpriv *hpriv;
+ int rc;
+
+ mutex_lock(&hl_devs_idr_lock);
+ hdev = idr_find(&hl_devs_idr, iminor(inode));
+ mutex_unlock(&hl_devs_idr_lock);
+
+ if (!hdev) {
+ pr_err("Couldn't find device %d:%d\n",
+ imajor(inode), iminor(inode));
+ return -ENXIO;
+ }
+
+ hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
+ if (!hpriv)
+ return -ENOMEM;
+
+ hpriv->hdev = hdev;
+ filp->private_data = hpriv;
+ hpriv->filp = filp;
+ mutex_init(&hpriv->restore_phase_mutex);
+ kref_init(&hpriv->refcount);
+ nonseekable_open(inode, filp);
+
+ hl_cb_mgr_init(&hpriv->cb_mgr);
+ hl_ctx_mgr_init(&hpriv->ctx_mgr);
+
+ hpriv->taskpid = find_get_pid(current->pid);
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_err_ratelimited(hdev->dev,
+ "Can't open %s because it is disabled or in reset\n",
+ dev_name(hdev->dev));
+ rc = -EPERM;
+ goto out_err;
+ }
+
+ if (hdev->in_debug) {
+ dev_err_ratelimited(hdev->dev,
+ "Can't open %s because it is being debugged by another user\n",
+ dev_name(hdev->dev));
+ rc = -EPERM;
+ goto out_err;
+ }
+
+ if (hdev->compute_ctx) {
+ dev_dbg_ratelimited(hdev->dev,
+ "Can't open %s because another user is working on it\n",
+ dev_name(hdev->dev));
+ rc = -EBUSY;
+ goto out_err;
+ }
+
+ rc = hl_ctx_create(hdev, hpriv);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to create context %d\n", rc);
+ goto out_err;
+ }
+
+ /* Device is IDLE at this point so it is legal to change PLLs.
+ * There is no need to check anything because if the PLL is
+ * already HIGH, the set function will return without doing
+ * anything
+ */
+ hl_device_set_frequency(hdev, PLL_HIGH);
+
+ list_add(&hpriv->dev_node, &hdev->fpriv_list);
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ hl_debugfs_add_file(hpriv);
+
+ return 0;
+
+out_err:
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+ hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+ filp->private_data = NULL;
+ mutex_destroy(&hpriv->restore_phase_mutex);
+ put_pid(hpriv->taskpid);
+
+ kfree(hpriv);
+
+ return rc;
+}
+
+int hl_device_open_ctrl(struct inode *inode, struct file *filp)
+{
+ struct hl_device *hdev;
+ struct hl_fpriv *hpriv;
+ int rc;
+
+ mutex_lock(&hl_devs_idr_lock);
+ hdev = idr_find(&hl_devs_idr, iminor(inode));
+ mutex_unlock(&hl_devs_idr_lock);
+
+ if (!hdev) {
+ pr_err("Couldn't find device %d:%d\n",
+ imajor(inode), iminor(inode));
+ return -ENXIO;
+ }
+
+ hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
+ if (!hpriv)
+ return -ENOMEM;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_err_ratelimited(hdev->dev_ctrl,
+ "Can't open %s because it is disabled or in reset\n",
+ dev_name(hdev->dev_ctrl));
+ rc = -EPERM;
+ goto out_err;
+ }
+
+ list_add(&hpriv->dev_node, &hdev->fpriv_list);
+ mutex_unlock(&hdev->fpriv_list_lock);
+
+ hpriv->hdev = hdev;
+ filp->private_data = hpriv;
+ hpriv->filp = filp;
+ hpriv->is_control = true;
+ nonseekable_open(inode, filp);
+
+ hpriv->taskpid = find_get_pid(current->pid);
+
+ return 0;
+
+out_err:
+ mutex_unlock(&hdev->fpriv_list_lock);
+ kfree(hpriv);
+ return rc;
+}
+
+static void set_driver_behavior_per_device(struct hl_device *hdev)
+{
+ hdev->mmu_enable = 1;
+ hdev->cpu_enable = 1;
+ hdev->fw_loading = 1;
+ hdev->cpu_queues_enable = 1;
+ hdev->heartbeat = 1;
+ hdev->clock_gating = 1;
+
+ hdev->reset_pcilink = 0;
+ hdev->axi_drain = 0;
+ hdev->sram_scrambler_enable = 1;
+ hdev->dram_scrambler_enable = 1;
+ hdev->bmc_enable = 1;
+ hdev->hard_reset_on_fw_events = 1;
+}
+
+/*
+ * create_hdev - create habanalabs device instance
+ *
+ * @dev: will hold the pointer to the new habanalabs device structure
+ * @pdev: pointer to the pci device
+ * @asic_type: in case of simulator device, which device is it
+ * @minor: in case of simulator device, the minor of the device
+ *
+ * Allocate memory for habanalabs device and initialize basic fields
+ * Identify the ASIC type
+ * Allocate ID (minor) for the device (only for real devices)
+ */
+int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
+ enum hl_asic_type asic_type, int minor)
+{
+ struct hl_device *hdev;
+ int rc, main_id, ctrl_id = 0;
+
+ *dev = NULL;
+
+ hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+ if (!hdev)
+ return -ENOMEM;
+
+ /* First, we must find out which ASIC are we handling. This is needed
+ * to configure the behavior of the driver (kernel parameters)
+ */
+ if (pdev) {
+ hdev->asic_type = get_asic_type(pdev->device);
+ if (hdev->asic_type == ASIC_INVALID) {
+ dev_err(&pdev->dev, "Unsupported ASIC\n");
+ rc = -ENODEV;
+ goto free_hdev;
+ }
+ } else {
+ hdev->asic_type = asic_type;
+ }
+
+ hdev->major = hl_major;
+ hdev->reset_on_lockup = reset_on_lockup;
+ hdev->pldm = 0;
+
+ set_driver_behavior_per_device(hdev);
+
+ if (timeout_locked)
+ hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
+ else
+ hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
+ hdev->disabled = true;
+ hdev->pdev = pdev; /* can be NULL in case of simulator device */
+
+ /* Set default DMA mask to 32 bits */
+ hdev->dma_mask = 32;
+
+ mutex_lock(&hl_devs_idr_lock);
+
+ /* Always save 2 numbers, 1 for main device and 1 for control.
+ * They must be consecutive
+ */
+ main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
+ GFP_KERNEL);
+
+ if (main_id >= 0)
+ ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
+ main_id + 2, GFP_KERNEL);
+
+ mutex_unlock(&hl_devs_idr_lock);
+
+ if ((main_id < 0) || (ctrl_id < 0)) {
+ if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
+ pr_err("too many devices in the system\n");
+
+ if (main_id >= 0) {
+ mutex_lock(&hl_devs_idr_lock);
+ idr_remove(&hl_devs_idr, main_id);
+ mutex_unlock(&hl_devs_idr_lock);
+ }
+
+ rc = -EBUSY;
+ goto free_hdev;
+ }
+
+ hdev->id = main_id;
+ hdev->id_control = ctrl_id;
+
+ *dev = hdev;
+
+ return 0;
+
+free_hdev:
+ kfree(hdev);
+ return rc;
+}
+
+/*
+ * destroy_hdev - destroy habanalabs device instance
+ *
+ * @dev: pointer to the habanalabs device structure
+ *
+ */
+void destroy_hdev(struct hl_device *hdev)
+{
+ /* Remove device from the device list */
+ mutex_lock(&hl_devs_idr_lock);
+ idr_remove(&hl_devs_idr, hdev->id);
+ idr_remove(&hl_devs_idr, hdev->id_control);
+ mutex_unlock(&hl_devs_idr_lock);
+
+ kfree(hdev);
+}
+
+static int hl_pmops_suspend(struct device *dev)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ pr_debug("Going to suspend PCI device\n");
+
+ if (!hdev) {
+ pr_err("device pointer is NULL in suspend\n");
+ return 0;
+ }
+
+ return hl_device_suspend(hdev);
+}
+
+static int hl_pmops_resume(struct device *dev)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ pr_debug("Going to resume PCI device\n");
+
+ if (!hdev) {
+ pr_err("device pointer is NULL in resume\n");
+ return 0;
+ }
+
+ return hl_device_resume(hdev);
+}
+
+/*
+ * hl_pci_probe - probe PCI habanalabs devices
+ *
+ * @pdev: pointer to pci device
+ * @id: pointer to pci device id structure
+ *
+ * Standard PCI probe function for habanalabs device.
+ * Create a new habanalabs device and initialize it according to the
+ * device's type
+ */
+static int hl_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct hl_device *hdev;
+ int rc;
+
+ dev_info(&pdev->dev, HL_NAME
+ " device found [%04x:%04x] (rev %x)\n",
+ (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
+
+ rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
+ if (rc)
+ return rc;
+
+ pci_set_drvdata(pdev, hdev);
+
+ rc = hl_device_init(hdev, hl_class);
+ if (rc) {
+ dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
+ rc = -ENODEV;
+ goto disable_device;
+ }
+
+ return 0;
+
+disable_device:
+ pci_set_drvdata(pdev, NULL);
+ destroy_hdev(hdev);
+
+ return rc;
+}
+
+/*
+ * hl_pci_remove - remove PCI habanalabs devices
+ *
+ * @pdev: pointer to pci device
+ *
+ * Standard PCI remove function for habanalabs device
+ */
+static void hl_pci_remove(struct pci_dev *pdev)
+{
+ struct hl_device *hdev;
+
+ hdev = pci_get_drvdata(pdev);
+ if (!hdev)
+ return;
+
+ hl_device_fini(hdev);
+ pci_set_drvdata(pdev, NULL);
+
+ destroy_hdev(hdev);
+}
+
+static const struct dev_pm_ops hl_pm_ops = {
+ .suspend = hl_pmops_suspend,
+ .resume = hl_pmops_resume,
+};
+
+static struct pci_driver hl_pci_driver = {
+ .name = HL_NAME,
+ .id_table = ids,
+ .probe = hl_pci_probe,
+ .remove = hl_pci_remove,
+ .driver.pm = &hl_pm_ops,
+};
+
+/*
+ * hl_init - Initialize the habanalabs kernel driver
+ */
+static int __init hl_init(void)
+{
+ int rc;
+ dev_t dev;
+
+ pr_info("loading driver\n");
+
+ rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
+ if (rc < 0) {
+ pr_err("unable to get major\n");
+ return rc;
+ }
+
+ hl_major = MAJOR(dev);
+
+ hl_class = class_create(THIS_MODULE, HL_NAME);
+ if (IS_ERR(hl_class)) {
+ pr_err("failed to allocate class\n");
+ rc = PTR_ERR(hl_class);
+ goto remove_major;
+ }
+
+ hl_debugfs_init();
+
+ rc = pci_register_driver(&hl_pci_driver);
+ if (rc) {
+ pr_err("failed to register pci device\n");
+ goto remove_debugfs;
+ }
+
+ pr_debug("driver loaded\n");
+
+ return 0;
+
+remove_debugfs:
+ hl_debugfs_fini();
+ class_destroy(hl_class);
+remove_major:
+ unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
+ return rc;
+}
+
+/*
+ * hl_exit - Release all resources of the habanalabs kernel driver
+ */
+static void __exit hl_exit(void)
+{
+ pci_unregister_driver(&hl_pci_driver);
+
+ /*
+ * Removing debugfs must be after all devices or simulator devices
+ * have been removed because otherwise we get a bug in the
+ * debugfs module for referencing NULL objects
+ */
+ hl_debugfs_fini();
+
+ class_destroy(hl_class);
+ unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
+
+ idr_destroy(&hl_devs_idr);
+
+ pr_debug("driver removed\n");
+}
+
+module_init(hl_init);
+module_exit(hl_exit);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
+ [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
+ [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
+ [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm),
+ [HL_DEBUG_OP_FUNNEL] = 0,
+ [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon),
+ [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu),
+ [HL_DEBUG_OP_TIMESTAMP] = 0
+
+};
+
+static int device_status_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_device_status dev_stat = {0};
+ u32 size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!size) || (!out))
+ return -EINVAL;
+
+ dev_stat.status = hl_device_status(hdev);
+
+ return copy_to_user(out, &dev_stat,
+ min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0;
+}
+
+static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_hw_ip_info hw_ip = {0};
+ u32 size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 sram_kmd_size, dram_kmd_size;
+
+ if ((!size) || (!out))
+ return -EINVAL;
+
+ sram_kmd_size = (prop->sram_user_base_address -
+ prop->sram_base_address);
+ dram_kmd_size = (prop->dram_user_base_address -
+ prop->dram_base_address);
+
+ hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
+ hw_ip.sram_base_address = prop->sram_user_base_address;
+ hw_ip.dram_base_address = prop->dram_user_base_address;
+ hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
+ hw_ip.sram_size = prop->sram_size - sram_kmd_size;
+ hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+ if (hw_ip.dram_size > PAGE_SIZE)
+ hw_ip.dram_enabled = 1;
+ hw_ip.num_of_events = prop->num_of_events;
+
+ memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
+ min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
+
+ memcpy(hw_ip.card_name, prop->armcp_info.card_name,
+ min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
+
+ hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
+ hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location);
+
+ hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
+ hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
+ hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
+ hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
+
+ return copy_to_user(out, &hw_ip,
+ min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
+}
+
+static int hw_events_info(struct hl_device *hdev, bool aggregate,
+ struct hl_info_args *args)
+{
+ u32 size, max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ void *arr;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
+
+ return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
+}
+
+static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_info_dram_usage dram_usage = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 dram_kmd_size;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ dram_kmd_size = (prop->dram_user_base_address -
+ prop->dram_base_address);
+ dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) -
+ atomic64_read(&hdev->dram_used_mem);
+ if (hpriv->ctx)
+ dram_usage.ctx_dram_mem =
+ atomic64_read(&hpriv->ctx->dram_phys_mem);
+
+ return copy_to_user(out, &dram_usage,
+ min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0;
+}
+
+static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_hw_idle hw_idle = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
+ &hw_idle.busy_engines_mask, NULL);
+
+ return copy_to_user(out, &hw_idle,
+ min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
+}
+
+static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
+{
+ struct hl_debug_params *params;
+ void *input = NULL, *output = NULL;
+ int rc;
+
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ params->reg_idx = args->reg_idx;
+ params->enable = args->enable;
+ params->op = args->op;
+
+ if (args->input_ptr && args->input_size) {
+ input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL);
+ if (!input) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (copy_from_user(input, u64_to_user_ptr(args->input_ptr),
+ args->input_size)) {
+ rc = -EFAULT;
+ dev_err(hdev->dev, "failed to copy input debug data\n");
+ goto out;
+ }
+
+ params->input = input;
+ }
+
+ if (args->output_ptr && args->output_size) {
+ output = kzalloc(args->output_size, GFP_KERNEL);
+ if (!output) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ params->output = output;
+ params->output_size = args->output_size;
+ }
+
+ rc = hdev->asic_funcs->debug_coresight(hdev, params);
+ if (rc) {
+ dev_err(hdev->dev,
+ "debug coresight operation failed %d\n", rc);
+ goto out;
+ }
+
+ if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr,
+ output, args->output_size)) {
+ dev_err(hdev->dev, "copy to user failed in debug ioctl\n");
+ rc = -EFAULT;
+ goto out;
+ }
+
+
+out:
+ kfree(params);
+ kfree(output);
+ kfree(input);
+
+ return rc;
+}
+
+static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_device_utilization device_util = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ if ((args->period_ms < 100) || (args->period_ms > 1000) ||
+ (args->period_ms % 100)) {
+ dev_err(hdev->dev,
+ "period %u must be between 100 - 1000 and must be divisible by 100\n",
+ args->period_ms);
+ return -EINVAL;
+ }
+
+ device_util.utilization = hl_device_utilization(hdev, args->period_ms);
+
+ return copy_to_user(out, &device_util,
+ min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
+}
+
+static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_clk_rate clk_rate = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
+ &clk_rate.max_clk_rate_mhz);
+ if (rc)
+ return rc;
+
+ return copy_to_user(out, &clk_rate,
+ min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
+}
+
+static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_reset_count reset_count = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
+ reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
+
+ return copy_to_user(out, &reset_count,
+ min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
+}
+
+static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_time_sync time_sync = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
+ time_sync.host_time = ktime_get_raw_ns();
+
+ return copy_to_user(out, &time_sync,
+ min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
+}
+
+static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_info_cs_counters cs_counters = {0};
+ u32 max_size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
+ sizeof(struct hl_cs_counters));
+
+ if (hpriv->ctx)
+ memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
+ sizeof(struct hl_cs_counters));
+
+ return copy_to_user(out, &cs_counters,
+ min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
+}
+
+static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
+ struct device *dev)
+{
+ struct hl_info_args *args = data;
+ struct hl_device *hdev = hpriv->hdev;
+ int rc;
+
+ /*
+ * Information is returned for the following opcodes even if the device
+ * is disabled or in reset.
+ */
+ switch (args->op) {
+ case HL_INFO_HW_IP_INFO:
+ return hw_ip_info(hdev, args);
+
+ case HL_INFO_DEVICE_STATUS:
+ return device_status_info(hdev, args);
+
+ case HL_INFO_RESET_COUNT:
+ return get_reset_count(hdev, args);
+
+ default:
+ break;
+ }
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(dev,
+ "Device is %s. Can't execute INFO IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ return -EBUSY;
+ }
+
+ switch (args->op) {
+ case HL_INFO_HW_EVENTS:
+ rc = hw_events_info(hdev, false, args);
+ break;
+
+ case HL_INFO_DRAM_USAGE:
+ rc = dram_usage_info(hpriv, args);
+ break;
+
+ case HL_INFO_HW_IDLE:
+ rc = hw_idle(hdev, args);
+ break;
+
+ case HL_INFO_DEVICE_UTILIZATION:
+ rc = device_utilization(hdev, args);
+ break;
+
+ case HL_INFO_HW_EVENTS_AGGREGATE:
+ rc = hw_events_info(hdev, true, args);
+ break;
+
+ case HL_INFO_CLK_RATE:
+ rc = get_clk_rate(hdev, args);
+ break;
+
+ case HL_INFO_TIME_SYNC:
+ return time_sync_info(hdev, args);
+
+ case HL_INFO_CS_COUNTERS:
+ return cs_counters_info(hpriv, args);
+
+ default:
+ dev_err(dev, "Invalid request %d\n", args->op);
+ rc = -ENOTTY;
+ break;
+ }
+
+ return rc;
+}
+
+static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
+}
+
+static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
+{
+ return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
+}
+
+static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ struct hl_debug_args *args = data;
+ struct hl_device *hdev = hpriv->hdev;
+ int rc = 0;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't execute DEBUG IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ return -EBUSY;
+ }
+
+ switch (args->op) {
+ case HL_DEBUG_OP_ETR:
+ case HL_DEBUG_OP_ETF:
+ case HL_DEBUG_OP_STM:
+ case HL_DEBUG_OP_FUNNEL:
+ case HL_DEBUG_OP_BMON:
+ case HL_DEBUG_OP_SPMU:
+ case HL_DEBUG_OP_TIMESTAMP:
+ if (!hdev->in_debug) {
+ dev_err_ratelimited(hdev->dev,
+ "Rejecting debug configuration request because device not in debug mode\n");
+ return -EFAULT;
+ }
+ args->input_size =
+ min(args->input_size, hl_debug_struct_size[args->op]);
+ rc = debug_coresight(hdev, args);
+ break;
+ case HL_DEBUG_OP_SET_MODE:
+ rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
+ break;
+ default:
+ dev_err(hdev->dev, "Invalid request %d\n", args->op);
+ rc = -ENOTTY;
+ break;
+ }
+
+ return rc;
+}
+
+#define HL_IOCTL_DEF(ioctl, _func) \
+ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
+
+static const struct hl_ioctl_desc hl_ioctls[] = {
+ HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
+};
+
+static const struct hl_ioctl_desc hl_ioctls_control[] = {
+ HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
+};
+
+static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
+ const struct hl_ioctl_desc *ioctl, struct device *dev)
+{
+ struct hl_fpriv *hpriv = filep->private_data;
+ struct hl_device *hdev = hpriv->hdev;
+ unsigned int nr = _IOC_NR(cmd);
+ char stack_kdata[128] = {0};
+ char *kdata = NULL;
+ unsigned int usize, asize;
+ hl_ioctl_t *func;
+ u32 hl_size;
+ int retcode;
+
+ if (hdev->hard_reset_pending) {
+ dev_crit_ratelimited(hdev->dev_ctrl,
+ "Device HARD reset pending! Please close FD\n");
+ return -ENODEV;
+ }
+
+ /* Do not trust userspace, use our own definition */
+ func = ioctl->func;
+
+ if (unlikely(!func)) {
+ dev_dbg(dev, "no function\n");
+ retcode = -ENOTTY;
+ goto out_err;
+ }
+
+ hl_size = _IOC_SIZE(ioctl->cmd);
+ usize = asize = _IOC_SIZE(cmd);
+ if (hl_size > asize)
+ asize = hl_size;
+
+ cmd = ioctl->cmd;
+
+ if (cmd & (IOC_IN | IOC_OUT)) {
+ if (asize <= sizeof(stack_kdata)) {
+ kdata = stack_kdata;
+ } else {
+ kdata = kzalloc(asize, GFP_KERNEL);
+ if (!kdata) {
+ retcode = -ENOMEM;
+ goto out_err;
+ }
+ }
+ }
+
+ if (cmd & IOC_IN) {
+ if (copy_from_user(kdata, (void __user *)arg, usize)) {
+ retcode = -EFAULT;
+ goto out_err;
+ }
+ } else if (cmd & IOC_OUT) {
+ memset(kdata, 0, usize);
+ }
+
+ retcode = func(hpriv, kdata);
+
+ if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize))
+ retcode = -EFAULT;
+
+out_err:
+ if (retcode)
+ dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
+ task_pid_nr(current), cmd, nr);
+
+ if (kdata != stack_kdata)
+ kfree(kdata);
+
+ return retcode;
+}
+
+long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ struct hl_fpriv *hpriv = filep->private_data;
+ struct hl_device *hdev = hpriv->hdev;
+ const struct hl_ioctl_desc *ioctl = NULL;
+ unsigned int nr = _IOC_NR(cmd);
+
+ if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
+ ioctl = &hl_ioctls[nr];
+ } else {
+ dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
+ task_pid_nr(current), nr);
+ return -ENOTTY;
+ }
+
+ return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
+}
+
+long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+ struct hl_fpriv *hpriv = filep->private_data;
+ struct hl_device *hdev = hpriv->hdev;
+ const struct hl_ioctl_desc *ioctl = NULL;
+ unsigned int nr = _IOC_NR(cmd);
+
+ if (nr == _IOC_NR(HL_IOCTL_INFO)) {
+ ioctl = &hl_ioctls_control[nr];
+ } else {
+ dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
+ task_pid_nr(current), nr);
+ return -ENOTTY;
+ }
+
+ return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+/*
+ * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
+ *
+ * @ptr: the current pi/ci value
+ * @val: the amount to add
+ *
+ * Add val to ptr. It can go until twice the queue length.
+ */
+inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
+{
+ ptr += val;
+ ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
+ return ptr;
+}
+static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
+{
+ return atomic_read(ci) & ((queue_len << 1) - 1);
+}
+
+static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
+{
+ int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
+
+ if (delta >= 0)
+ return (queue_len - delta);
+ else
+ return (abs(delta) - queue_len);
+}
+
+void hl_int_hw_queue_update_ci(struct hl_cs *cs)
+{
+ struct hl_device *hdev = cs->ctx->hdev;
+ struct hl_hw_queue *q;
+ int i;
+
+ if (hdev->disabled)
+ return;
+
+ q = &hdev->kernel_queues[0];
+ for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
+ if (q->queue_type == QUEUE_TYPE_INT)
+ atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
+ }
+}
+
+/*
+ * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
+ * H/W queue.
+ * @hdev: pointer to habanalabs device structure
+ * @q: pointer to habanalabs queue structure
+ * @ctl: BD's control word
+ * @len: BD's length
+ * @ptr: BD's pointer
+ *
+ * This function assumes there is enough space on the queue to submit a new
+ * BD to it. It initializes the next BD and calls the device specific
+ * function to set the pi (and doorbell)
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
+ struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
+{
+ struct hl_bd *bd;
+
+ bd = (struct hl_bd *) (uintptr_t) q->kernel_address;
+ bd += hl_pi_2_offset(q->pi);
+ bd->ctl = cpu_to_le32(ctl);
+ bd->len = cpu_to_le32(len);
+ bd->ptr = cpu_to_le64(ptr);
+
+ q->pi = hl_queue_inc_ptr(q->pi);
+ hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
+}
+
+/*
+ * ext_queue_sanity_checks - perform some sanity checks on external queue
+ *
+ * @hdev : pointer to hl_device structure
+ * @q : pointer to hl_hw_queue structure
+ * @num_of_entries : how many entries to check for space
+ * @reserve_cq_entry : whether to reserve an entry in the cq
+ *
+ * H/W queues spinlock should be taken before calling this function
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the h/w queue
+ * - Make sure we have enough space in the completion queue
+ * - Reserve space in the completion queue (needs to be reversed if there
+ * is a failure down the road before the actual submission of work). Only
+ * do this action if reserve_cq_entry is true
+ *
+ */
+static int ext_queue_sanity_checks(struct hl_device *hdev,
+ struct hl_hw_queue *q, int num_of_entries,
+ bool reserve_cq_entry)
+{
+ atomic_t *free_slots =
+ &hdev->completion_queue[q->cq_id].free_slots_cnt;
+ int free_slots_cnt;
+
+ /* Check we have enough space in the queue */
+ free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
+
+ if (free_slots_cnt < num_of_entries) {
+ dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+ q->hw_queue_id, num_of_entries);
+ return -EAGAIN;
+ }
+
+ if (reserve_cq_entry) {
+ /*
+ * Check we have enough space in the completion queue
+ * Add -1 to counter (decrement) unless counter was already 0
+ * In that case, CQ is full so we can't submit a new CB because
+ * we won't get ack on its completion
+ * atomic_add_unless will return 0 if counter was already 0
+ */
+ if (atomic_add_negative(num_of_entries * -1, free_slots)) {
+ dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
+ num_of_entries, q->hw_queue_id);
+ atomic_add(num_of_entries, free_slots);
+ return -EAGAIN;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * int_queue_sanity_checks - perform some sanity checks on internal queue
+ *
+ * @hdev : pointer to hl_device structure
+ * @q : pointer to hl_hw_queue structure
+ * @num_of_entries : how many entries to check for space
+ *
+ * H/W queues spinlock should be taken before calling this function
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the h/w queue
+ *
+ */
+static int int_queue_sanity_checks(struct hl_device *hdev,
+ struct hl_hw_queue *q,
+ int num_of_entries)
+{
+ int free_slots_cnt;
+
+ if (num_of_entries > q->int_queue_len) {
+ dev_err(hdev->dev,
+ "Cannot populate queue %u with %u jobs\n",
+ q->hw_queue_id, num_of_entries);
+ return -ENOMEM;
+ }
+
+ /* Check we have enough space in the queue */
+ free_slots_cnt = queue_free_slots(q, q->int_queue_len);
+
+ if (free_slots_cnt < num_of_entries) {
+ dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+ q->hw_queue_id, num_of_entries);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/*
+ * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
+ * @hdev: Pointer to hl_device structure.
+ * @q: Pointer to hl_hw_queue structure.
+ * @num_of_entries: How many entries to check for space.
+ *
+ * Notice: We do not reserve queue entries so this function mustn't be called
+ * more than once per CS for the same queue
+ *
+ */
+static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
+ int num_of_entries)
+{
+ int free_slots_cnt;
+
+ /* Check we have enough space in the queue */
+ free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
+
+ if (free_slots_cnt < num_of_entries) {
+ dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+ q->hw_queue_id, num_of_entries);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/*
+ * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
+ *
+ * @hdev: pointer to hl_device structure
+ * @hw_queue_id: Queue's type
+ * @cb_size: size of CB
+ * @cb_ptr: pointer to CB location
+ *
+ * This function sends a single CB, that must NOT generate a completion entry
+ *
+ */
+int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
+ u32 cb_size, u64 cb_ptr)
+{
+ struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
+ int rc = 0;
+
+ /*
+ * The CPU queue is a synchronous queue with an effective depth of
+ * a single entry (although it is allocated with room for multiple
+ * entries). Therefore, there is a different lock, called
+ * send_cpu_message_lock, that serializes accesses to the CPU queue.
+ * As a result, we don't need to lock the access to the entire H/W
+ * queues module when submitting a JOB to the CPU queue
+ */
+ if (q->queue_type != QUEUE_TYPE_CPU)
+ hdev->asic_funcs->hw_queues_lock(hdev);
+
+ if (hdev->disabled) {
+ rc = -EPERM;
+ goto out;
+ }
+
+ /*
+ * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
+ * type only on init phase, when the queues are empty and being tested,
+ * so there is no need for sanity checks.
+ */
+ if (q->queue_type != QUEUE_TYPE_HW) {
+ rc = ext_queue_sanity_checks(hdev, q, 1, false);
+ if (rc)
+ goto out;
+ }
+
+ ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
+
+out:
+ if (q->queue_type != QUEUE_TYPE_CPU)
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ return rc;
+}
+
+/*
+ * ext_queue_schedule_job - submit a JOB to an external queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void ext_queue_schedule_job(struct hl_cs_job *job)
+{
+ struct hl_device *hdev = job->cs->ctx->hdev;
+ struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+ struct hl_cq_entry cq_pkt;
+ struct hl_cq *cq;
+ u64 cq_addr;
+ struct hl_cb *cb;
+ u32 ctl;
+ u32 len;
+ u64 ptr;
+
+ /*
+ * Update the JOB ID inside the BD CTL so the device would know what
+ * to write in the completion queue
+ */
+ ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
+
+ cb = job->patched_cb;
+ len = job->job_cb_size;
+ ptr = cb->bus_address;
+
+ cq_pkt.data = cpu_to_le32(
+ ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
+ & CQ_ENTRY_SHADOW_INDEX_MASK) |
+ (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
+ (1 << CQ_ENTRY_READY_SHIFT));
+
+ /*
+ * No need to protect pi_offset because scheduling to the
+ * H/W queues is done under the scheduler mutex
+ *
+ * No need to check if CQ is full because it was already
+ * checked in ext_queue_sanity_checks
+ */
+ cq = &hdev->completion_queue[q->cq_id];
+ cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
+
+ hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
+ cq_addr,
+ le32_to_cpu(cq_pkt.data),
+ q->msi_vec,
+ job->contains_dma_pkt);
+
+ q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
+
+ cq->pi = hl_cq_inc_ptr(cq->pi);
+
+ ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+}
+
+/*
+ * int_queue_schedule_job - submit a JOB to an internal queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void int_queue_schedule_job(struct hl_cs_job *job)
+{
+ struct hl_device *hdev = job->cs->ctx->hdev;
+ struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+ struct hl_bd bd;
+ __le64 *pi;
+
+ bd.ctl = 0;
+ bd.len = cpu_to_le32(job->job_cb_size);
+ bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
+
+ pi = (__le64 *) (uintptr_t) (q->kernel_address +
+ ((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
+
+ q->pi++;
+ q->pi &= ((q->int_queue_len << 1) - 1);
+
+ hdev->asic_funcs->pqe_write(hdev, pi, &bd);
+
+ hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
+}
+
+/*
+ * hw_queue_schedule_job - submit a JOB to a H/W queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void hw_queue_schedule_job(struct hl_cs_job *job)
+{
+ struct hl_device *hdev = job->cs->ctx->hdev;
+ struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+ u64 ptr;
+ u32 offset, ctl, len;
+
+ /*
+ * Upon PQE completion, COMP_DATA is used as the write data to the
+ * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
+ * write address offset in the SM block (QMAN LBW message).
+ * The write address offset is calculated as "COMP_OFFSET << 2".
+ */
+ offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
+ ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
+ ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
+
+ len = job->job_cb_size;
+
+ /*
+ * A patched CB is created only if a user CB was allocated by driver and
+ * MMU is disabled. If MMU is enabled, the user CB should be used
+ * instead. If the user CB wasn't allocated by driver, assume that it
+ * holds an address.
+ */
+ if (job->patched_cb)
+ ptr = job->patched_cb->bus_address;
+ else if (job->is_kernel_allocated_cb)
+ ptr = job->user_cb->bus_address;
+ else
+ ptr = (u64) (uintptr_t) job->user_cb;
+
+ ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+}
+
+/*
+ * init_signal_wait_cs - initialize a signal/wait CS
+ * @cs: pointer to the signal/wait CS
+ *
+ * H/W queues spinlock should be taken before calling this function
+ */
+static void init_signal_wait_cs(struct hl_cs *cs)
+{
+ struct hl_ctx *ctx = cs->ctx;
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_hw_queue *hw_queue;
+ struct hl_cs_compl *cs_cmpl =
+ container_of(cs->fence, struct hl_cs_compl, base_fence);
+
+ struct hl_hw_sob *hw_sob;
+ struct hl_cs_job *job;
+ u32 q_idx;
+
+ /* There is only one job in a signal/wait CS */
+ job = list_first_entry(&cs->job_list, struct hl_cs_job,
+ cs_node);
+ q_idx = job->hw_queue_id;
+ hw_queue = &hdev->kernel_queues[q_idx];
+
+ if (cs->type & CS_TYPE_SIGNAL) {
+ hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset];
+
+ cs_cmpl->hw_sob = hw_sob;
+ cs_cmpl->sob_val = hw_queue->next_sob_val++;
+
+ dev_dbg(hdev->dev,
+ "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
+ cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+
+ hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
+ cs_cmpl->hw_sob->sob_id);
+
+ kref_get(&hw_sob->kref);
+
+ /* check for wraparound */
+ if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) {
+ /*
+ * Decrement as we reached the max value.
+ * The release function won't be called here as we've
+ * just incremented the refcount.
+ */
+ kref_put(&hw_sob->kref, hl_sob_reset_error);
+ hw_queue->next_sob_val = 1;
+ /* only two SOBs are currently in use */
+ hw_queue->curr_sob_offset =
+ (hw_queue->curr_sob_offset + 1) %
+ HL_RSVD_SOBS_IN_USE;
+
+ dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
+ hw_queue->curr_sob_offset, q_idx);
+ }
+ } else if (cs->type & CS_TYPE_WAIT) {
+ struct hl_cs_compl *signal_cs_cmpl;
+
+ signal_cs_cmpl = container_of(cs->signal_fence,
+ struct hl_cs_compl,
+ base_fence);
+
+ /* copy the the SOB id and value of the signal CS */
+ cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+ cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+
+ dev_dbg(hdev->dev,
+ "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
+ cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
+ hw_queue->base_mon_id, q_idx);
+
+ hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb,
+ cs_cmpl->hw_sob->sob_id,
+ cs_cmpl->sob_val,
+ hw_queue->base_mon_id,
+ q_idx);
+
+ kref_get(&cs_cmpl->hw_sob->kref);
+ /*
+ * Must put the signal fence after the SOB refcnt increment so
+ * the SOB refcnt won't turn 0 and reset the SOB before the
+ * wait CS was submitted.
+ */
+ mb();
+ dma_fence_put(cs->signal_fence);
+ cs->signal_fence = NULL;
+ }
+}
+
+/*
+ * hl_hw_queue_schedule_cs - schedule a command submission
+ * @cs: pointer to the CS
+ */
+int hl_hw_queue_schedule_cs(struct hl_cs *cs)
+{
+ struct hl_ctx *ctx = cs->ctx;
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_cs_job *job, *tmp;
+ struct hl_hw_queue *q;
+ u32 max_queues;
+ int rc = 0, i, cq_cnt;
+
+ hdev->asic_funcs->hw_queues_lock(hdev);
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ ctx->cs_counters.device_in_reset_drop_cnt++;
+ dev_err(hdev->dev,
+ "device is disabled or in reset, CS rejected!\n");
+ rc = -EPERM;
+ goto out;
+ }
+
+ max_queues = hdev->asic_prop.max_queues;
+
+ q = &hdev->kernel_queues[0];
+ for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
+ if (cs->jobs_in_queue_cnt[i]) {
+ switch (q->queue_type) {
+ case QUEUE_TYPE_EXT:
+ rc = ext_queue_sanity_checks(hdev, q,
+ cs->jobs_in_queue_cnt[i], true);
+ break;
+ case QUEUE_TYPE_INT:
+ rc = int_queue_sanity_checks(hdev, q,
+ cs->jobs_in_queue_cnt[i]);
+ break;
+ case QUEUE_TYPE_HW:
+ rc = hw_queue_sanity_checks(hdev, q,
+ cs->jobs_in_queue_cnt[i]);
+ break;
+ default:
+ dev_err(hdev->dev, "Queue type %d is invalid\n",
+ q->queue_type);
+ rc = -EINVAL;
+ break;
+ }
+
+ if (rc) {
+ ctx->cs_counters.queue_full_drop_cnt++;
+ goto unroll_cq_resv;
+ }
+
+ if (q->queue_type == QUEUE_TYPE_EXT)
+ cq_cnt++;
+ }
+ }
+
+ if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT))
+ init_signal_wait_cs(cs);
+
+ spin_lock(&hdev->hw_queues_mirror_lock);
+ list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
+
+ /* Queue TDR if the CS is the first entry and if timeout is wanted */
+ if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
+ (list_first_entry(&hdev->hw_queues_mirror_list,
+ struct hl_cs, mirror_node) == cs)) {
+ cs->tdr_active = true;
+ schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
+ spin_unlock(&hdev->hw_queues_mirror_lock);
+ } else {
+ spin_unlock(&hdev->hw_queues_mirror_lock);
+ }
+
+ if (!hdev->cs_active_cnt++) {
+ struct hl_device_idle_busy_ts *ts;
+
+ ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
+ ts->busy_to_idle_ts = ktime_set(0, 0);
+ ts->idle_to_busy_ts = ktime_get();
+ }
+
+ list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+ switch (job->queue_type) {
+ case QUEUE_TYPE_EXT:
+ ext_queue_schedule_job(job);
+ break;
+ case QUEUE_TYPE_INT:
+ int_queue_schedule_job(job);
+ break;
+ case QUEUE_TYPE_HW:
+ hw_queue_schedule_job(job);
+ break;
+ default:
+ break;
+ }
+
+ cs->submitted = true;
+
+ goto out;
+
+unroll_cq_resv:
+ q = &hdev->kernel_queues[0];
+ for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
+ if ((q->queue_type == QUEUE_TYPE_EXT) &&
+ (cs->jobs_in_queue_cnt[i])) {
+ atomic_t *free_slots =
+ &hdev->completion_queue[i].free_slots_cnt;
+ atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
+ cq_cnt--;
+ }
+ }
+
+out:
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ return rc;
+}
+
+/*
+ * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
+ *
+ * @hdev: pointer to hl_device structure
+ * @hw_queue_id: which queue to increment its ci
+ */
+void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
+{
+ struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
+
+ atomic_inc(&q->ci);
+}
+
+static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
+ bool is_cpu_queue)
+{
+ void *p;
+ int rc;
+
+ if (is_cpu_queue)
+ p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ &q->bus_address);
+ else
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ &q->bus_address,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!p)
+ return -ENOMEM;
+
+ q->kernel_address = (u64) (uintptr_t) p;
+
+ q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
+ sizeof(*q->shadow_queue),
+ GFP_KERNEL);
+ if (!q->shadow_queue) {
+ dev_err(hdev->dev,
+ "Failed to allocate shadow queue for H/W queue %d\n",
+ q->hw_queue_id);
+ rc = -ENOMEM;
+ goto free_queue;
+ }
+
+ /* Make sure read/write pointers are initialized to start of queue */
+ atomic_set(&q->ci, 0);
+ q->pi = 0;
+
+ return 0;
+
+free_queue:
+ if (is_cpu_queue)
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address);
+ else
+ hdev->asic_funcs->asic_dma_free_coherent(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address,
+ q->bus_address);
+
+ return rc;
+}
+
+static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+ void *p;
+
+ p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
+ &q->bus_address, &q->int_queue_len);
+ if (!p) {
+ dev_err(hdev->dev,
+ "Failed to get base address for internal queue %d\n",
+ q->hw_queue_id);
+ return -EFAULT;
+ }
+
+ q->kernel_address = (u64) (uintptr_t) p;
+ q->pi = 0;
+ atomic_set(&q->ci, 0);
+
+ return 0;
+}
+
+static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+ return ext_and_cpu_queue_init(hdev, q, true);
+}
+
+static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+ return ext_and_cpu_queue_init(hdev, q, false);
+}
+
+static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+ void *p;
+
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ &q->bus_address,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!p)
+ return -ENOMEM;
+
+ q->kernel_address = (u64) (uintptr_t) p;
+
+ /* Make sure read/write pointers are initialized to start of queue */
+ atomic_set(&q->ci, 0);
+ q->pi = 0;
+
+ return 0;
+}
+
+static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
+{
+ struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_hw_sob *hw_sob;
+ int sob, queue_idx = hdev->sync_stream_queue_idx++;
+
+ hw_queue->base_sob_id =
+ prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
+ hw_queue->base_mon_id =
+ prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
+ hw_queue->next_sob_val = 1;
+ hw_queue->curr_sob_offset = 0;
+
+ for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
+ hw_sob = &hw_queue->hw_sob[sob];
+ hw_sob->hdev = hdev;
+ hw_sob->sob_id = hw_queue->base_sob_id + sob;
+ hw_sob->q_idx = q_idx;
+ kref_init(&hw_sob->kref);
+ }
+}
+
+static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
+{
+ struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
+
+ /*
+ * In case we got here due to a stuck CS, the refcnt might be bigger
+ * than 1 and therefore we reset it.
+ */
+ kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
+ hw_queue->curr_sob_offset = 0;
+ hw_queue->next_sob_val = 1;
+}
+
+/*
+ * queue_init - main initialization function for H/W queue object
+ *
+ * @hdev: pointer to hl_device device structure
+ * @q: pointer to hl_hw_queue queue structure
+ * @hw_queue_id: The id of the H/W queue
+ *
+ * Allocate dma-able memory for the queue and initialize fields
+ * Returns 0 on success
+ */
+static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
+ u32 hw_queue_id)
+{
+ int rc;
+
+ q->hw_queue_id = hw_queue_id;
+
+ switch (q->queue_type) {
+ case QUEUE_TYPE_EXT:
+ rc = ext_queue_init(hdev, q);
+ break;
+ case QUEUE_TYPE_INT:
+ rc = int_queue_init(hdev, q);
+ break;
+ case QUEUE_TYPE_CPU:
+ rc = cpu_queue_init(hdev, q);
+ break;
+ case QUEUE_TYPE_HW:
+ rc = hw_queue_init(hdev, q);
+ break;
+ case QUEUE_TYPE_NA:
+ q->valid = 0;
+ return 0;
+ default:
+ dev_crit(hdev->dev, "wrong queue type %d during init\n",
+ q->queue_type);
+ rc = -EINVAL;
+ break;
+ }
+
+ if (q->supports_sync_stream)
+ sync_stream_queue_init(hdev, q->hw_queue_id);
+
+ if (rc)
+ return rc;
+
+ q->valid = 1;
+
+ return 0;
+}
+
+/*
+ * hw_queue_fini - destroy queue
+ *
+ * @hdev: pointer to hl_device device structure
+ * @q: pointer to hl_hw_queue queue structure
+ *
+ * Free the queue memory
+ */
+static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+ if (!q->valid)
+ return;
+
+ /*
+ * If we arrived here, there are no jobs waiting on this queue
+ * so we can safely remove it.
+ * This is because this function can only called when:
+ * 1. Either a context is deleted, which only can occur if all its
+ * jobs were finished
+ * 2. A context wasn't able to be created due to failure or timeout,
+ * which means there are no jobs on the queue yet
+ *
+ * The only exception are the queues of the kernel context, but
+ * if they are being destroyed, it means that the entire module is
+ * being removed. If the module is removed, it means there is no open
+ * user context. It also means that if a job was submitted by
+ * the kernel driver (e.g. context creation), the job itself was
+ * released by the kernel driver when a timeout occurred on its
+ * Completion. Thus, we don't need to release it again.
+ */
+
+ if (q->queue_type == QUEUE_TYPE_INT)
+ return;
+
+ kfree(q->shadow_queue);
+
+ if (q->queue_type == QUEUE_TYPE_CPU)
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address);
+ else
+ hdev->asic_funcs->asic_dma_free_coherent(hdev,
+ HL_QUEUE_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address,
+ q->bus_address);
+}
+
+int hl_hw_queues_create(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *asic = &hdev->asic_prop;
+ struct hl_hw_queue *q;
+ int i, rc, q_ready_cnt;
+
+ hdev->kernel_queues = kcalloc(asic->max_queues,
+ sizeof(*hdev->kernel_queues), GFP_KERNEL);
+
+ if (!hdev->kernel_queues) {
+ dev_err(hdev->dev, "Not enough memory for H/W queues\n");
+ return -ENOMEM;
+ }
+
+ /* Initialize the H/W queues */
+ for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
+ i < asic->max_queues ; i++, q_ready_cnt++, q++) {
+
+ q->queue_type = asic->hw_queues_props[i].type;
+ q->supports_sync_stream =
+ asic->hw_queues_props[i].supports_sync_stream;
+ rc = queue_init(hdev, q, i);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to initialize queue %d\n", i);
+ goto release_queues;
+ }
+ }
+
+ return 0;
+
+release_queues:
+ for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
+ queue_fini(hdev, q);
+
+ kfree(hdev->kernel_queues);
+
+ return rc;
+}
+
+void hl_hw_queues_destroy(struct hl_device *hdev)
+{
+ struct hl_hw_queue *q;
+ u32 max_queues = hdev->asic_prop.max_queues;
+ int i;
+
+ for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
+ queue_fini(hdev, q);
+
+ kfree(hdev->kernel_queues);
+}
+
+void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
+{
+ struct hl_hw_queue *q;
+ u32 max_queues = hdev->asic_prop.max_queues;
+ int i;
+
+ for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
+ if ((!q->valid) ||
+ ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
+ continue;
+ q->pi = 0;
+ atomic_set(&q->ci, 0);
+
+ if (q->supports_sync_stream)
+ sync_stream_queue_reset(hdev, q->hw_queue_id);
+ }
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+#include <linux/hwmon.h>
+
+#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */
+#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1)
+
+int hl_build_hwmon_channel_info(struct hl_device *hdev,
+ struct armcp_sensor *sensors_arr)
+{
+ u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
+ u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
+ u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0};
+ struct hwmon_channel_info **channels_info;
+ u32 num_sensors_for_type, num_active_sensor_types = 0,
+ arr_size = 0, *curr_arr;
+ enum hwmon_sensor_types type;
+ int rc, i, j;
+
+ for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
+ type = le32_to_cpu(sensors_arr[i].type);
+
+ if ((type == 0) && (sensors_arr[i].flags == 0))
+ break;
+
+ if (type >= HWMON_NR_SENSOR_TYPES) {
+ dev_err(hdev->dev,
+ "Got wrong sensor type %d from device\n", type);
+ return -EINVAL;
+ }
+
+ counts[type]++;
+ arr_size++;
+ }
+
+ for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
+ if (counts[i] == 0)
+ continue;
+
+ num_sensors_for_type = counts[i] + 1;
+ curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr),
+ GFP_KERNEL);
+ if (!curr_arr) {
+ rc = -ENOMEM;
+ goto sensors_type_err;
+ }
+
+ num_active_sensor_types++;
+ sensors_by_type[i] = curr_arr;
+ }
+
+ for (i = 0 ; i < arr_size ; i++) {
+ type = le32_to_cpu(sensors_arr[i].type);
+ curr_arr = sensors_by_type[type];
+ curr_arr[sensors_by_type_next_index[type]++] =
+ le32_to_cpu(sensors_arr[i].flags);
+ }
+
+ channels_info = kcalloc(num_active_sensor_types + 1,
+ sizeof(*channels_info), GFP_KERNEL);
+ if (!channels_info) {
+ rc = -ENOMEM;
+ goto channels_info_array_err;
+ }
+
+ for (i = 0 ; i < num_active_sensor_types ; i++) {
+ channels_info[i] = kzalloc(sizeof(*channels_info[i]),
+ GFP_KERNEL);
+ if (!channels_info[i]) {
+ rc = -ENOMEM;
+ goto channel_info_err;
+ }
+ }
+
+ for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
+ if (!sensors_by_type[i])
+ continue;
+
+ channels_info[j]->type = i;
+ channels_info[j]->config = sensors_by_type[i];
+ j++;
+ }
+
+ hdev->hl_chip_info->info =
+ (const struct hwmon_channel_info **)channels_info;
+
+ return 0;
+
+channel_info_err:
+ for (i = 0 ; i < num_active_sensor_types ; i++)
+ if (channels_info[i]) {
+ kfree(channels_info[i]->config);
+ kfree(channels_info[i]);
+ }
+ kfree(channels_info);
+channels_info_array_err:
+sensors_type_err:
+ for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++)
+ kfree(sensors_by_type[i]);
+
+ return rc;
+}
+
+static int hl_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -ENODEV;
+
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ case hwmon_temp_max:
+ case hwmon_temp_crit:
+ case hwmon_temp_max_hyst:
+ case hwmon_temp_crit_hyst:
+ case hwmon_temp_offset:
+ case hwmon_temp_highest:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = hl_get_temperature(hdev, channel, attr, val);
+ break;
+ case hwmon_in:
+ switch (attr) {
+ case hwmon_in_input:
+ case hwmon_in_min:
+ case hwmon_in_max:
+ case hwmon_in_highest:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = hl_get_voltage(hdev, channel, attr, val);
+ break;
+ case hwmon_curr:
+ switch (attr) {
+ case hwmon_curr_input:
+ case hwmon_curr_min:
+ case hwmon_curr_max:
+ case hwmon_curr_highest:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = hl_get_current(hdev, channel, attr, val);
+ break;
+ case hwmon_fan:
+ switch (attr) {
+ case hwmon_fan_input:
+ case hwmon_fan_min:
+ case hwmon_fan_max:
+ break;
+ default:
+ return -EINVAL;
+ }
+ rc = hl_get_fan_speed(hdev, channel, attr, val);
+ break;
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_input:
+ case hwmon_pwm_enable:
+ break;
+ default:
+ return -EINVAL;
+ }
+ rc = hl_get_pwm_info(hdev, channel, attr, val);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return rc;
+}
+
+static int hl_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -ENODEV;
+
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_offset:
+ case hwmon_temp_reset_history:
+ break;
+ default:
+ return -EINVAL;
+ }
+ hl_set_temperature(hdev, channel, attr, val);
+ break;
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_input:
+ case hwmon_pwm_enable:
+ break;
+ default:
+ return -EINVAL;
+ }
+ hl_set_pwm_info(hdev, channel, attr, val);
+ break;
+ case hwmon_in:
+ switch (attr) {
+ case hwmon_in_reset_history:
+ break;
+ default:
+ return -EINVAL;
+ }
+ hl_set_voltage(hdev, channel, attr, val);
+ break;
+ case hwmon_curr:
+ switch (attr) {
+ case hwmon_curr_reset_history:
+ break;
+ default:
+ return -EINVAL;
+ }
+ hl_set_current(hdev, channel, attr, val);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ switch (type) {
+ case hwmon_temp:
+ switch (attr) {
+ case hwmon_temp_input:
+ case hwmon_temp_max:
+ case hwmon_temp_max_hyst:
+ case hwmon_temp_crit:
+ case hwmon_temp_crit_hyst:
+ case hwmon_temp_highest:
+ return 0444;
+ case hwmon_temp_offset:
+ return 0644;
+ case hwmon_temp_reset_history:
+ return 0200;
+ }
+ break;
+ case hwmon_in:
+ switch (attr) {
+ case hwmon_in_input:
+ case hwmon_in_min:
+ case hwmon_in_max:
+ case hwmon_in_highest:
+ return 0444;
+ case hwmon_in_reset_history:
+ return 0200;
+ }
+ break;
+ case hwmon_curr:
+ switch (attr) {
+ case hwmon_curr_input:
+ case hwmon_curr_min:
+ case hwmon_curr_max:
+ case hwmon_curr_highest:
+ return 0444;
+ case hwmon_curr_reset_history:
+ return 0200;
+ }
+ break;
+ case hwmon_fan:
+ switch (attr) {
+ case hwmon_fan_input:
+ case hwmon_fan_min:
+ case hwmon_fan_max:
+ return 0444;
+ }
+ break;
+ case hwmon_pwm:
+ switch (attr) {
+ case hwmon_pwm_input:
+ case hwmon_pwm_enable:
+ return 0644;
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct hwmon_ops hl_hwmon_ops = {
+ .is_visible = hl_is_visible,
+ .read = hl_read,
+ .write = hl_write
+};
+
+int hl_get_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, value);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get temperature from sensor %d, error %d\n",
+ sensor_index, rc);
+ *value = 0;
+ }
+
+ return rc;
+}
+
+int hl_set_temperature(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+ pkt.value = __cpu_to_le64(value);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to set temperature of sensor %d, error %d\n",
+ sensor_index, rc);
+
+ return rc;
+}
+
+int hl_get_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, value);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get voltage from sensor %d, error %d\n",
+ sensor_index, rc);
+ *value = 0;
+ }
+
+ return rc;
+}
+
+int hl_get_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, value);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get current from sensor %d, error %d\n",
+ sensor_index, rc);
+ *value = 0;
+ }
+
+ return rc;
+}
+
+int hl_get_fan_speed(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, value);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get fan speed from sensor %d, error %d\n",
+ sensor_index, rc);
+ *value = 0;
+ }
+
+ return rc;
+}
+
+int hl_get_pwm_info(struct hl_device *hdev,
+ int sensor_index, u32 attr, long *value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, value);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get pwm info from sensor %d, error %d\n",
+ sensor_index, rc);
+ *value = 0;
+ }
+
+ return rc;
+}
+
+void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
+ long value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+ pkt.value = cpu_to_le64(value);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to set pwm info to sensor %d, error %d\n",
+ sensor_index, rc);
+}
+
+int hl_set_voltage(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+ pkt.value = __cpu_to_le64(value);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to set voltage of sensor %d, error %d\n",
+ sensor_index, rc);
+
+ return rc;
+}
+
+int hl_set_current(struct hl_device *hdev,
+ int sensor_index, u32 attr, long value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.sensor_index = __cpu_to_le16(sensor_index);
+ pkt.type = __cpu_to_le16(attr);
+ pkt.value = __cpu_to_le64(value);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SENSORS_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to set current of sensor %d, error %d\n",
+ sensor_index, rc);
+
+ return rc;
+}
+
+int hl_hwmon_init(struct hl_device *hdev)
+{
+ struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc;
+
+ if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
+ return 0;
+
+ if (hdev->hl_chip_info->info) {
+ hdev->hl_chip_info->ops = &hl_hwmon_ops;
+
+ hdev->hwmon_dev = hwmon_device_register_with_info(dev,
+ prop->armcp_info.card_name, hdev,
+ hdev->hl_chip_info, NULL);
+ if (IS_ERR(hdev->hwmon_dev)) {
+ rc = PTR_ERR(hdev->hwmon_dev);
+ dev_err(hdev->dev,
+ "Unable to register hwmon device: %d\n", rc);
+ return rc;
+ }
+
+ dev_info(hdev->dev, "%s: add sensors information\n",
+ dev_name(hdev->hwmon_dev));
+
+ hdev->hwmon_initialized = true;
+ } else {
+ dev_info(hdev->dev, "no available sensors\n");
+ }
+
+ return 0;
+}
+
+void hl_hwmon_fini(struct hl_device *hdev)
+{
+ if (!hdev->hwmon_initialized)
+ return;
+
+ hwmon_device_unregister(hdev->hwmon_dev);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+/**
+ * struct hl_eqe_work - This structure is used to schedule work of EQ
+ * entry and armcp_reset event
+ *
+ * @eq_work: workqueue object to run when EQ entry is received
+ * @hdev: pointer to device structure
+ * @eq_entry: copy of the EQ entry
+ */
+struct hl_eqe_work {
+ struct work_struct eq_work;
+ struct hl_device *hdev;
+ struct hl_eq_entry eq_entry;
+};
+
+/**
+ * hl_cq_inc_ptr - increment ci or pi of cq
+ *
+ * @ptr: the current ci or pi value of the completion queue
+ *
+ * Increment ptr by 1. If it reaches the number of completion queue
+ * entries, set it to 0
+ */
+inline u32 hl_cq_inc_ptr(u32 ptr)
+{
+ ptr++;
+ if (unlikely(ptr == HL_CQ_LENGTH))
+ ptr = 0;
+ return ptr;
+}
+
+/**
+ * hl_eq_inc_ptr - increment ci of eq
+ *
+ * @ptr: the current ci value of the event queue
+ *
+ * Increment ptr by 1. If it reaches the number of event queue
+ * entries, set it to 0
+ */
+inline u32 hl_eq_inc_ptr(u32 ptr)
+{
+ ptr++;
+ if (unlikely(ptr == HL_EQ_LENGTH))
+ ptr = 0;
+ return ptr;
+}
+
+static void irq_handle_eqe(struct work_struct *work)
+{
+ struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
+ eq_work);
+ struct hl_device *hdev = eqe_work->hdev;
+
+ hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
+
+ kfree(eqe_work);
+}
+
+/**
+ * hl_irq_handler_cq - irq handler for completion queue
+ *
+ * @irq: irq number
+ * @arg: pointer to completion queue structure
+ *
+ */
+irqreturn_t hl_irq_handler_cq(int irq, void *arg)
+{
+ struct hl_cq *cq = arg;
+ struct hl_device *hdev = cq->hdev;
+ struct hl_hw_queue *queue;
+ struct hl_cs_job *job;
+ bool shadow_index_valid;
+ u16 shadow_index;
+ struct hl_cq_entry *cq_entry, *cq_base;
+
+ if (hdev->disabled) {
+ dev_dbg(hdev->dev,
+ "Device disabled but received IRQ %d for CQ %d\n",
+ irq, cq->hw_queue_id);
+ return IRQ_HANDLED;
+ }
+
+ cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address;
+
+ while (1) {
+ bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
+ CQ_ENTRY_READY_MASK)
+ >> CQ_ENTRY_READY_SHIFT);
+
+ if (!entry_ready)
+ break;
+
+ cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
+
+ /* Make sure we read CQ entry contents after we've
+ * checked the ownership bit.
+ */
+ dma_rmb();
+
+ shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
+ CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
+ >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
+
+ shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
+ CQ_ENTRY_SHADOW_INDEX_MASK)
+ >> CQ_ENTRY_SHADOW_INDEX_SHIFT);
+
+ queue = &hdev->kernel_queues[cq->hw_queue_id];
+
+ if ((shadow_index_valid) && (!hdev->disabled)) {
+ job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
+ queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
+ }
+
+ atomic_inc(&queue->ci);
+
+ /* Clear CQ entry ready bit */
+ cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
+ ~CQ_ENTRY_READY_MASK);
+
+ cq->ci = hl_cq_inc_ptr(cq->ci);
+
+ /* Increment free slots */
+ atomic_inc(&cq->free_slots_cnt);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * hl_irq_handler_eq - irq handler for event queue
+ *
+ * @irq: irq number
+ * @arg: pointer to event queue structure
+ *
+ */
+irqreturn_t hl_irq_handler_eq(int irq, void *arg)
+{
+ struct hl_eq *eq = arg;
+ struct hl_device *hdev = eq->hdev;
+ struct hl_eq_entry *eq_entry;
+ struct hl_eq_entry *eq_base;
+ struct hl_eqe_work *handle_eqe_work;
+
+ eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address;
+
+ while (1) {
+ bool entry_ready =
+ ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
+ EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
+
+ if (!entry_ready)
+ break;
+
+ eq_entry = &eq_base[eq->ci];
+
+ /*
+ * Make sure we read EQ entry contents after we've
+ * checked the ownership bit.
+ */
+ dma_rmb();
+
+ if (hdev->disabled) {
+ dev_warn(hdev->dev,
+ "Device disabled but received IRQ %d for EQ\n",
+ irq);
+ goto skip_irq;
+ }
+
+ handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
+ if (handle_eqe_work) {
+ INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
+ handle_eqe_work->hdev = hdev;
+
+ memcpy(&handle_eqe_work->eq_entry, eq_entry,
+ sizeof(*eq_entry));
+
+ queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
+ }
+skip_irq:
+ /* Clear EQ entry ready bit */
+ eq_entry->hdr.ctl =
+ cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
+ ~EQ_CTL_READY_MASK);
+
+ eq->ci = hl_eq_inc_ptr(eq->ci);
+
+ hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * hl_cq_init - main initialization function for an cq object
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to cq structure
+ * @hw_queue_id: The H/W queue ID this completion queue belongs to
+ *
+ * Allocate dma-able memory for the completion queue and initialize fields
+ * Returns 0 on success
+ */
+int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
+{
+ void *p;
+
+ p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+ &q->bus_address, GFP_KERNEL | __GFP_ZERO);
+ if (!p)
+ return -ENOMEM;
+
+ q->hdev = hdev;
+ q->kernel_address = (u64) (uintptr_t) p;
+ q->hw_queue_id = hw_queue_id;
+ q->ci = 0;
+ q->pi = 0;
+
+ atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
+
+ return 0;
+}
+
+/**
+ * hl_cq_fini - destroy completion queue
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to cq structure
+ *
+ * Free the completion queue memory
+ */
+void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
+{
+ hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address, q->bus_address);
+}
+
+void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
+{
+ q->ci = 0;
+ q->pi = 0;
+
+ atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
+
+ /*
+ * It's not enough to just reset the PI/CI because the H/W may have
+ * written valid completion entries before it was halted and therefore
+ * we need to clean the actual queues so we won't process old entries
+ * when the device is operational again
+ */
+
+ memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES);
+}
+
+/**
+ * hl_eq_init - main initialization function for an event queue object
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to eq structure
+ *
+ * Allocate dma-able memory for the event queue and initialize fields
+ * Returns 0 on success
+ */
+int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
+{
+ void *p;
+
+ p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ HL_EQ_SIZE_IN_BYTES,
+ &q->bus_address);
+ if (!p)
+ return -ENOMEM;
+
+ q->hdev = hdev;
+ q->kernel_address = (u64) (uintptr_t) p;
+ q->ci = 0;
+
+ return 0;
+}
+
+/**
+ * hl_eq_fini - destroy event queue
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to eq structure
+ *
+ * Free the event queue memory
+ */
+void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
+{
+ flush_workqueue(hdev->eq_wq);
+
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ HL_EQ_SIZE_IN_BYTES,
+ (void *) (uintptr_t) q->kernel_address);
+}
+
+void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
+{
+ q->ci = 0;
+
+ /*
+ * It's not enough to just reset the PI/CI because the H/W may have
+ * written valid completion entries before it was halted and therefore
+ * we need to clean the actual queues so we won't process old entries
+ * when the device is operational again
+ */
+
+ memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+
+#define HL_MMU_DEBUG 0
+
+/*
+ * The va ranges in context object contain a list with the available chunks of
+ * device virtual memory.
+ * There is one range for host allocations and one for DRAM allocations.
+ *
+ * On initialization each range contains one chunk of all of its available
+ * virtual range which is a half of the total device virtual range.
+ *
+ * On each mapping of physical pages, a suitable virtual range chunk (with a
+ * minimum size) is selected from the list. If the chunk size equals the
+ * requested size, the chunk is returned. Otherwise, the chunk is split into
+ * two chunks - one to return as result and a remainder to stay in the list.
+ *
+ * On each Unmapping of a virtual address, the relevant virtual chunk is
+ * returned to the list. The chunk is added to the list and if its edges match
+ * the edges of the adjacent chunks (means a contiguous chunk can be created),
+ * the chunks are merged.
+ *
+ * On finish, the list is checked to have only one chunk of all the relevant
+ * virtual range (which is a half of the device total virtual range).
+ * If not (means not all mappings were unmapped), a warning is printed.
+ */
+
+/*
+ * alloc_device_memory - allocate device memory
+ *
+ * @ctx : current context
+ * @args : host parameters containing the requested size
+ * @ret_handle : result handle
+ *
+ * This function does the following:
+ * - Allocate the requested size rounded up to 2MB pages
+ * - Return unique handle
+ */
+static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
+ u32 *ret_handle)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ u64 paddr = 0, total_size, num_pgs, i;
+ u32 num_curr_pgs, page_size, page_shift;
+ int handle, rc;
+ bool contiguous;
+
+ num_curr_pgs = 0;
+ page_size = hdev->asic_prop.dram_page_size;
+ page_shift = __ffs(page_size);
+ num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
+ total_size = num_pgs << page_shift;
+
+ contiguous = args->flags & HL_MEM_CONTIGUOUS;
+
+ if (contiguous) {
+ paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
+ if (!paddr) {
+ dev_err(hdev->dev,
+ "failed to allocate %llu huge contiguous pages\n",
+ num_pgs);
+ return -ENOMEM;
+ }
+ }
+
+ phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+ if (!phys_pg_pack) {
+ rc = -ENOMEM;
+ goto pages_pack_err;
+ }
+
+ phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
+ phys_pg_pack->asid = ctx->asid;
+ phys_pg_pack->npages = num_pgs;
+ phys_pg_pack->page_size = page_size;
+ phys_pg_pack->total_size = total_size;
+ phys_pg_pack->flags = args->flags;
+ phys_pg_pack->contiguous = contiguous;
+
+ phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
+ if (!phys_pg_pack->pages) {
+ rc = -ENOMEM;
+ goto pages_arr_err;
+ }
+
+ if (phys_pg_pack->contiguous) {
+ for (i = 0 ; i < num_pgs ; i++)
+ phys_pg_pack->pages[i] = paddr + i * page_size;
+ } else {
+ for (i = 0 ; i < num_pgs ; i++) {
+ phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
+ vm->dram_pg_pool,
+ page_size);
+ if (!phys_pg_pack->pages[i]) {
+ dev_err(hdev->dev,
+ "Failed to allocate device memory (out of memory)\n");
+ rc = -ENOMEM;
+ goto page_err;
+ }
+
+ num_curr_pgs++;
+ }
+ }
+
+ spin_lock(&vm->idr_lock);
+ handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
+ GFP_ATOMIC);
+ spin_unlock(&vm->idr_lock);
+
+ if (handle < 0) {
+ dev_err(hdev->dev, "Failed to get handle for page\n");
+ rc = -EFAULT;
+ goto idr_err;
+ }
+
+ for (i = 0 ; i < num_pgs ; i++)
+ kref_get(&vm->dram_pg_pool_refcount);
+
+ phys_pg_pack->handle = handle;
+
+ atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+ atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+ *ret_handle = handle;
+
+ return 0;
+
+idr_err:
+page_err:
+ if (!phys_pg_pack->contiguous)
+ for (i = 0 ; i < num_curr_pgs ; i++)
+ gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
+ page_size);
+
+ kvfree(phys_pg_pack->pages);
+pages_arr_err:
+ kfree(phys_pg_pack);
+pages_pack_err:
+ if (contiguous)
+ gen_pool_free(vm->dram_pg_pool, paddr, total_size);
+
+ return rc;
+}
+
+/*
+ * dma_map_host_va - DMA mapping of the given host virtual address.
+ * @hdev: habanalabs device structure
+ * @addr: the host virtual address of the memory area
+ * @size: the size of the memory area
+ * @p_userptr: pointer to result userptr structure
+ *
+ * This function does the following:
+ * - Allocate userptr structure
+ * - Pin the given host memory using the userptr structure
+ * - Perform DMA mapping to have the DMA addresses of the pages
+ */
+static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
+ struct hl_userptr **p_userptr)
+{
+ struct hl_userptr *userptr;
+ int rc;
+
+ userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
+ if (!userptr) {
+ rc = -ENOMEM;
+ goto userptr_err;
+ }
+
+ rc = hl_pin_host_memory(hdev, addr, size, userptr);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to pin host memory\n");
+ goto pin_err;
+ }
+
+ rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
+ userptr->sgt->nents, DMA_BIDIRECTIONAL);
+ if (rc) {
+ dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+ goto dma_map_err;
+ }
+
+ userptr->dma_mapped = true;
+ userptr->dir = DMA_BIDIRECTIONAL;
+ userptr->vm_type = VM_TYPE_USERPTR;
+
+ *p_userptr = userptr;
+
+ return 0;
+
+dma_map_err:
+ hl_unpin_host_memory(hdev, userptr);
+pin_err:
+ kfree(userptr);
+userptr_err:
+
+ return rc;
+}
+
+/*
+ * dma_unmap_host_va - DMA unmapping of the given host virtual address.
+ * @hdev: habanalabs device structure
+ * @userptr: userptr to free
+ *
+ * This function does the following:
+ * - Unpins the physical pages
+ * - Frees the userptr structure
+ */
+static void dma_unmap_host_va(struct hl_device *hdev,
+ struct hl_userptr *userptr)
+{
+ hl_unpin_host_memory(hdev, userptr);
+ kfree(userptr);
+}
+
+/*
+ * dram_pg_pool_do_release - free DRAM pages pool
+ *
+ * @ref : pointer to reference object
+ *
+ * This function does the following:
+ * - Frees the idr structure of physical pages handles
+ * - Frees the generic pool of DRAM physical pages
+ */
+static void dram_pg_pool_do_release(struct kref *ref)
+{
+ struct hl_vm *vm = container_of(ref, struct hl_vm,
+ dram_pg_pool_refcount);
+
+ /*
+ * free the idr here as only here we know for sure that there are no
+ * allocated physical pages and hence there are no handles in use
+ */
+ idr_destroy(&vm->phys_pg_pack_handles);
+ gen_pool_destroy(vm->dram_pg_pool);
+}
+
+/*
+ * free_phys_pg_pack - free physical page pack
+ * @hdev: habanalabs device structure
+ * @phys_pg_pack: physical page pack to free
+ *
+ * This function does the following:
+ * - For DRAM memory only, iterate over the pack and free each physical block
+ * structure by returning it to the general pool
+ * - Free the hl_vm_phys_pg_pack structure
+ */
+static void free_phys_pg_pack(struct hl_device *hdev,
+ struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+ struct hl_vm *vm = &hdev->vm;
+ u64 i;
+
+ if (!phys_pg_pack->created_from_userptr) {
+ if (phys_pg_pack->contiguous) {
+ gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+ phys_pg_pack->total_size);
+
+ for (i = 0; i < phys_pg_pack->npages ; i++)
+ kref_put(&vm->dram_pg_pool_refcount,
+ dram_pg_pool_do_release);
+ } else {
+ for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+ gen_pool_free(vm->dram_pg_pool,
+ phys_pg_pack->pages[i],
+ phys_pg_pack->page_size);
+ kref_put(&vm->dram_pg_pool_refcount,
+ dram_pg_pool_do_release);
+ }
+ }
+ }
+
+ kvfree(phys_pg_pack->pages);
+ kfree(phys_pg_pack);
+}
+
+/*
+ * free_device_memory - free device memory
+ *
+ * @ctx : current context
+ * @handle : handle of the memory chunk to free
+ *
+ * This function does the following:
+ * - Free the device memory related to the given handle
+ */
+static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+
+ spin_lock(&vm->idr_lock);
+ phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+ if (phys_pg_pack) {
+ if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
+ dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
+ handle);
+ spin_unlock(&vm->idr_lock);
+ return -EINVAL;
+ }
+
+ /*
+ * must remove from idr before the freeing of the physical
+ * pages as the refcount of the pool is also the trigger of the
+ * idr destroy
+ */
+ idr_remove(&vm->phys_pg_pack_handles, handle);
+ spin_unlock(&vm->idr_lock);
+
+ atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+ atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+ free_phys_pg_pack(hdev, phys_pg_pack);
+ } else {
+ spin_unlock(&vm->idr_lock);
+ dev_err(hdev->dev,
+ "free device memory failed, no match for handle %u\n",
+ handle);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * clear_va_list_locked - free virtual addresses list
+ *
+ * @hdev : habanalabs device structure
+ * @va_list : list of virtual addresses to free
+ *
+ * This function does the following:
+ * - Iterate over the list and free each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void clear_va_list_locked(struct hl_device *hdev,
+ struct list_head *va_list)
+{
+ struct hl_vm_va_block *va_block, *tmp;
+
+ list_for_each_entry_safe(va_block, tmp, va_list, node) {
+ list_del(&va_block->node);
+ kfree(va_block);
+ }
+}
+
+/*
+ * print_va_list_locked - print virtual addresses list
+ *
+ * @hdev : habanalabs device structure
+ * @va_list : list of virtual addresses to print
+ *
+ * This function does the following:
+ * - Iterate over the list and print each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void print_va_list_locked(struct hl_device *hdev,
+ struct list_head *va_list)
+{
+#if HL_MMU_DEBUG
+ struct hl_vm_va_block *va_block;
+
+ dev_dbg(hdev->dev, "print va list:\n");
+
+ list_for_each_entry(va_block, va_list, node)
+ dev_dbg(hdev->dev,
+ "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
+ va_block->start, va_block->end, va_block->size);
+#endif
+}
+
+/*
+ * merge_va_blocks_locked - merge a virtual block if possible
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_list : pointer to the virtual addresses block list
+ * @va_block : virtual block to merge with adjacent blocks
+ *
+ * This function does the following:
+ * - Merge the given blocks with the adjacent blocks if their virtual ranges
+ * create a contiguous virtual range
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static void merge_va_blocks_locked(struct hl_device *hdev,
+ struct list_head *va_list, struct hl_vm_va_block *va_block)
+{
+ struct hl_vm_va_block *prev, *next;
+
+ prev = list_prev_entry(va_block, node);
+ if (&prev->node != va_list && prev->end + 1 == va_block->start) {
+ prev->end = va_block->end;
+ prev->size = prev->end - prev->start;
+ list_del(&va_block->node);
+ kfree(va_block);
+ va_block = prev;
+ }
+
+ next = list_next_entry(va_block, node);
+ if (&next->node != va_list && va_block->end + 1 == next->start) {
+ next->start = va_block->start;
+ next->size = next->end - next->start;
+ list_del(&va_block->node);
+ kfree(va_block);
+ }
+}
+
+/*
+ * add_va_block_locked - add a virtual block to the virtual addresses list
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_list : pointer to the virtual addresses block list
+ * @start : start virtual address
+ * @end : end virtual address
+ *
+ * This function does the following:
+ * - Add the given block to the virtual blocks list and merge with other
+ * blocks if a contiguous virtual block can be created
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static int add_va_block_locked(struct hl_device *hdev,
+ struct list_head *va_list, u64 start, u64 end)
+{
+ struct hl_vm_va_block *va_block, *res = NULL;
+ u64 size = end - start;
+
+ print_va_list_locked(hdev, va_list);
+
+ list_for_each_entry(va_block, va_list, node) {
+ /* TODO: remove upon matureness */
+ if (hl_mem_area_crosses_range(start, size, va_block->start,
+ va_block->end)) {
+ dev_err(hdev->dev,
+ "block crossing ranges at start 0x%llx, end 0x%llx\n",
+ va_block->start, va_block->end);
+ return -EINVAL;
+ }
+
+ if (va_block->end < start)
+ res = va_block;
+ }
+
+ va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
+ if (!va_block)
+ return -ENOMEM;
+
+ va_block->start = start;
+ va_block->end = end;
+ va_block->size = size;
+
+ if (!res)
+ list_add(&va_block->node, va_list);
+ else
+ list_add(&va_block->node, &res->node);
+
+ merge_va_blocks_locked(hdev, va_list, va_block);
+
+ print_va_list_locked(hdev, va_list);
+
+ return 0;
+}
+
+/*
+ * add_va_block - wrapper for add_va_block_locked
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_list : pointer to the virtual addresses block list
+ * @start : start virtual address
+ * @end : end virtual address
+ *
+ * This function does the following:
+ * - Takes the list lock and calls add_va_block_locked
+ */
+static inline int add_va_block(struct hl_device *hdev,
+ struct hl_va_range *va_range, u64 start, u64 end)
+{
+ int rc;
+
+ mutex_lock(&va_range->lock);
+ rc = add_va_block_locked(hdev, &va_range->list, start, end);
+ mutex_unlock(&va_range->lock);
+
+ return rc;
+}
+
+/*
+ * get_va_block - get a virtual block with the requested size
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @va_range : pointer to the virtual addresses range
+ * @size : requested block size
+ * @hint_addr : hint for request address by the user
+ * @is_userptr : is host or DRAM memory
+ *
+ * This function does the following:
+ * - Iterate on the virtual block list to find a suitable virtual block for the
+ * requested size
+ * - Reserve the requested block and update the list
+ * - Return the start address of the virtual block
+ */
+static u64 get_va_block(struct hl_device *hdev,
+ struct hl_va_range *va_range, u64 size, u64 hint_addr,
+ bool is_userptr)
+{
+ struct hl_vm_va_block *va_block, *new_va_block = NULL;
+ u64 valid_start, valid_size, prev_start, prev_end, page_mask,
+ res_valid_start = 0, res_valid_size = 0;
+ u32 page_size;
+ bool add_prev = false;
+
+ if (is_userptr)
+ /*
+ * We cannot know if the user allocated memory with huge pages
+ * or not, hence we continue with the biggest possible
+ * granularity.
+ */
+ page_size = hdev->asic_prop.pmmu_huge.page_size;
+ else
+ page_size = hdev->asic_prop.dmmu.page_size;
+
+ page_mask = ~((u64)page_size - 1);
+
+ mutex_lock(&va_range->lock);
+
+ print_va_list_locked(hdev, &va_range->list);
+
+ list_for_each_entry(va_block, &va_range->list, node) {
+ /* calc the first possible aligned addr */
+ valid_start = va_block->start;
+
+ if (valid_start & (page_size - 1)) {
+ valid_start &= page_mask;
+ valid_start += page_size;
+ if (valid_start > va_block->end)
+ continue;
+ }
+
+ valid_size = va_block->end - valid_start;
+
+ if (valid_size >= size &&
+ (!new_va_block || valid_size < res_valid_size)) {
+ new_va_block = va_block;
+ res_valid_start = valid_start;
+ res_valid_size = valid_size;
+ }
+
+ if (hint_addr && hint_addr >= valid_start &&
+ ((hint_addr + size) <= va_block->end)) {
+ new_va_block = va_block;
+ res_valid_start = hint_addr;
+ res_valid_size = valid_size;
+ break;
+ }
+ }
+
+ if (!new_va_block) {
+ dev_err(hdev->dev, "no available va block for size %llu\n",
+ size);
+ goto out;
+ }
+
+ if (res_valid_start > new_va_block->start) {
+ prev_start = new_va_block->start;
+ prev_end = res_valid_start - 1;
+
+ new_va_block->start = res_valid_start;
+ new_va_block->size = res_valid_size;
+
+ add_prev = true;
+ }
+
+ if (new_va_block->size > size) {
+ new_va_block->start += size;
+ new_va_block->size = new_va_block->end - new_va_block->start;
+ } else {
+ list_del(&new_va_block->node);
+ kfree(new_va_block);
+ }
+
+ if (add_prev)
+ add_va_block_locked(hdev, &va_range->list, prev_start,
+ prev_end);
+
+ print_va_list_locked(hdev, &va_range->list);
+out:
+ mutex_unlock(&va_range->lock);
+
+ return res_valid_start;
+}
+
+/*
+ * get_sg_info - get number of pages and the DMA address from SG list
+ *
+ * @sg : the SG list
+ * @dma_addr : pointer to DMA address to return
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+ *dma_addr = sg_dma_address(sg);
+
+ return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+ (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
+/*
+ * init_phys_pg_pack_from_userptr - initialize physical page pack from host
+ * memory
+ * @ctx: current context
+ * @userptr: userptr to initialize from
+ * @pphys_pg_pack: result pointer
+ *
+ * This function does the following:
+ * - Pin the physical pages related to the given virtual block
+ * - Create a physical page pack from the physical pages related to the given
+ * virtual block
+ */
+static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
+ struct hl_userptr *userptr,
+ struct hl_vm_phys_pg_pack **pphys_pg_pack)
+{
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ struct scatterlist *sg;
+ dma_addr_t dma_addr;
+ u64 page_mask, total_npages;
+ u32 npages, page_size = PAGE_SIZE,
+ huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
+ bool first = true, is_huge_page_opt = true;
+ int rc, i, j;
+ u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
+
+ phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+ if (!phys_pg_pack)
+ return -ENOMEM;
+
+ phys_pg_pack->vm_type = userptr->vm_type;
+ phys_pg_pack->created_from_userptr = true;
+ phys_pg_pack->asid = ctx->asid;
+ atomic_set(&phys_pg_pack->mapping_cnt, 1);
+
+ /* Only if all dma_addrs are aligned to 2MB and their
+ * sizes is at least 2MB, we can use huge page mapping.
+ * We limit the 2MB optimization to this condition,
+ * since later on we acquire the related VA range as one
+ * consecutive block.
+ */
+ total_npages = 0;
+ for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ npages = get_sg_info(sg, &dma_addr);
+
+ total_npages += npages;
+
+ if ((npages % pgs_in_huge_page) ||
+ (dma_addr & (huge_page_size - 1)))
+ is_huge_page_opt = false;
+ }
+
+ if (is_huge_page_opt) {
+ page_size = huge_page_size;
+ do_div(total_npages, pgs_in_huge_page);
+ }
+
+ page_mask = ~(((u64) page_size) - 1);
+
+ phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
+ GFP_KERNEL);
+ if (!phys_pg_pack->pages) {
+ rc = -ENOMEM;
+ goto page_pack_arr_mem_err;
+ }
+
+ phys_pg_pack->npages = total_npages;
+ phys_pg_pack->page_size = page_size;
+ phys_pg_pack->total_size = total_npages * page_size;
+
+ j = 0;
+ for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+ npages = get_sg_info(sg, &dma_addr);
+
+ /* align down to physical page size and save the offset */
+ if (first) {
+ first = false;
+ phys_pg_pack->offset = dma_addr & (page_size - 1);
+ dma_addr &= page_mask;
+ }
+
+ while (npages) {
+ phys_pg_pack->pages[j++] = dma_addr;
+ dma_addr += page_size;
+
+ if (is_huge_page_opt)
+ npages -= pgs_in_huge_page;
+ else
+ npages--;
+ }
+ }
+
+ *pphys_pg_pack = phys_pg_pack;
+
+ return 0;
+
+page_pack_arr_mem_err:
+ kfree(phys_pg_pack);
+
+ return rc;
+}
+
+/*
+ * map_phys_pg_pack - maps the physical page pack.
+ * @ctx: current context
+ * @vaddr: start address of the virtual area to map from
+ * @phys_pg_pack: the pack of physical pages to map to
+ *
+ * This function does the following:
+ * - Maps each chunk of virtual memory to matching physical chunk
+ * - Stores number of successful mappings in the given argument
+ * - Returns 0 on success, error code otherwise
+ */
+static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
+ struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+ struct hl_device *hdev = ctx->hdev;
+ u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
+ u32 page_size = phys_pg_pack->page_size;
+ int rc = 0;
+
+ for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+ paddr = phys_pg_pack->pages[i];
+
+ rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+ (i + 1) == phys_pg_pack->npages);
+ if (rc) {
+ dev_err(hdev->dev,
+ "map failed for handle %u, npages: %llu, mapped: %llu",
+ phys_pg_pack->handle, phys_pg_pack->npages,
+ mapped_pg_cnt);
+ goto err;
+ }
+
+ mapped_pg_cnt++;
+ next_vaddr += page_size;
+ }
+
+ return 0;
+
+err:
+ next_vaddr = vaddr;
+ for (i = 0 ; i < mapped_pg_cnt ; i++) {
+ if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+ (i + 1) == mapped_pg_cnt))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
+ phys_pg_pack->handle, next_vaddr,
+ phys_pg_pack->pages[i], page_size);
+
+ next_vaddr += page_size;
+ }
+
+ return rc;
+}
+
+/*
+ * unmap_phys_pg_pack - unmaps the physical page pack
+ * @ctx: current context
+ * @vaddr: start address of the virtual area to unmap
+ * @phys_pg_pack: the pack of physical pages to unmap
+ */
+static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
+ struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+ struct hl_device *hdev = ctx->hdev;
+ u64 next_vaddr, i;
+ u32 page_size;
+
+ page_size = phys_pg_pack->page_size;
+ next_vaddr = vaddr;
+
+ for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
+ if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+ (i + 1) == phys_pg_pack->npages))
+ dev_warn_ratelimited(hdev->dev,
+ "unmap failed for vaddr: 0x%llx\n", next_vaddr);
+
+ /*
+ * unmapping on Palladium can be really long, so avoid a CPU
+ * soft lockup bug by sleeping a little between unmapping pages
+ */
+ if (hdev->pldm)
+ usleep_range(500, 1000);
+ }
+}
+
+static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
+ u64 *paddr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ u32 handle;
+
+ handle = lower_32_bits(args->map_device.handle);
+ spin_lock(&vm->idr_lock);
+ phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+ if (!phys_pg_pack) {
+ spin_unlock(&vm->idr_lock);
+ dev_err(hdev->dev, "no match for handle %u\n", handle);
+ return -EINVAL;
+ }
+
+ *paddr = phys_pg_pack->pages[0];
+
+ spin_unlock(&vm->idr_lock);
+
+ return 0;
+}
+
+/*
+ * map_device_va - map the given memory
+ *
+ * @ctx : current context
+ * @args : host parameters with handle/host virtual address
+ * @device_addr : pointer to result device virtual address
+ *
+ * This function does the following:
+ * - If given a physical device memory handle, map to a device virtual block
+ * and return the start address of this block
+ * - If given a host virtual address and size, find the related physical pages,
+ * map a device virtual block to this pages and return the start address of
+ * this block
+ */
+static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+ u64 *device_addr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_pack;
+ struct hl_userptr *userptr = NULL;
+ struct hl_vm_hash_node *hnode;
+ struct hl_va_range *va_range;
+ enum vm_type_t *vm_type;
+ u64 ret_vaddr, hint_addr;
+ u32 handle = 0;
+ int rc;
+ bool is_userptr = args->flags & HL_MEM_USERPTR;
+
+ /* Assume failure */
+ *device_addr = 0;
+
+ if (is_userptr) {
+ u64 addr = args->map_host.host_virt_addr,
+ size = args->map_host.mem_size;
+
+ rc = dma_map_host_va(hdev, addr, size, &userptr);
+ if (rc) {
+ dev_err(hdev->dev, "failed to get userptr from va\n");
+ return rc;
+ }
+
+ rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+ &phys_pg_pack);
+ if (rc) {
+ dev_err(hdev->dev,
+ "unable to init page pack for vaddr 0x%llx\n",
+ addr);
+ goto init_page_pack_err;
+ }
+
+ vm_type = (enum vm_type_t *) userptr;
+ hint_addr = args->map_host.hint_addr;
+ handle = phys_pg_pack->handle;
+ } else {
+ handle = lower_32_bits(args->map_device.handle);
+
+ spin_lock(&vm->idr_lock);
+ phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+ if (!phys_pg_pack) {
+ spin_unlock(&vm->idr_lock);
+ dev_err(hdev->dev,
+ "no match for handle %u\n", handle);
+ return -EINVAL;
+ }
+
+ /* increment now to avoid freeing device memory while mapping */
+ atomic_inc(&phys_pg_pack->mapping_cnt);
+
+ spin_unlock(&vm->idr_lock);
+
+ vm_type = (enum vm_type_t *) phys_pg_pack;
+
+ hint_addr = args->map_device.hint_addr;
+ }
+
+ /*
+ * relevant for mapping device physical memory only, as host memory is
+ * implicitly shared
+ */
+ if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
+ phys_pg_pack->asid != ctx->asid) {
+ dev_err(hdev->dev,
+ "Failed to map memory, handle %u is not shared\n",
+ handle);
+ rc = -EPERM;
+ goto shared_err;
+ }
+
+ hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
+ if (!hnode) {
+ rc = -ENOMEM;
+ goto hnode_err;
+ }
+
+ if (is_userptr)
+ if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
+ va_range = ctx->host_va_range;
+ else
+ va_range = ctx->host_huge_va_range;
+ else
+ va_range = ctx->dram_va_range;
+
+ ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
+ hint_addr, is_userptr);
+ if (!ret_vaddr) {
+ dev_err(hdev->dev, "no available va block for handle %u\n",
+ handle);
+ rc = -ENOMEM;
+ goto va_block_err;
+ }
+
+ mutex_lock(&ctx->mmu_lock);
+
+ rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
+ if (rc) {
+ mutex_unlock(&ctx->mmu_lock);
+ dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
+ handle);
+ goto map_err;
+ }
+
+ rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
+
+ mutex_unlock(&ctx->mmu_lock);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "mapping handle %u failed due to MMU cache invalidation\n",
+ handle);
+ goto map_err;
+ }
+
+ ret_vaddr += phys_pg_pack->offset;
+
+ hnode->ptr = vm_type;
+ hnode->vaddr = ret_vaddr;
+
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ *device_addr = ret_vaddr;
+
+ if (is_userptr)
+ free_phys_pg_pack(hdev, phys_pg_pack);
+
+ return 0;
+
+map_err:
+ if (add_va_block(hdev, va_range, ret_vaddr,
+ ret_vaddr + phys_pg_pack->total_size - 1))
+ dev_warn(hdev->dev,
+ "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
+ handle, ret_vaddr);
+
+va_block_err:
+ kfree(hnode);
+hnode_err:
+shared_err:
+ atomic_dec(&phys_pg_pack->mapping_cnt);
+ if (is_userptr)
+ free_phys_pg_pack(hdev, phys_pg_pack);
+init_page_pack_err:
+ if (is_userptr)
+ dma_unmap_host_va(hdev, userptr);
+
+ return rc;
+}
+
+/*
+ * unmap_device_va - unmap the given device virtual address
+ *
+ * @ctx : current context
+ * @vaddr : device virtual address to unmap
+ * @ctx_free : true if in context free flow, false otherwise.
+ *
+ * This function does the following:
+ * - Unmap the physical pages related to the given virtual address
+ * - return the device virtual block to the virtual block list
+ */
+static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+ struct hl_vm_hash_node *hnode = NULL;
+ struct hl_userptr *userptr = NULL;
+ struct hl_va_range *va_range;
+ enum vm_type_t *vm_type;
+ bool is_userptr;
+ int rc = 0;
+
+ /* protect from double entrance */
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
+ if (vaddr == hnode->vaddr)
+ break;
+
+ if (!hnode) {
+ mutex_unlock(&ctx->mem_hash_lock);
+ dev_err(hdev->dev,
+ "unmap failed, no mem hnode for vaddr 0x%llx\n",
+ vaddr);
+ return -EINVAL;
+ }
+
+ hash_del(&hnode->node);
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ vm_type = hnode->ptr;
+
+ if (*vm_type == VM_TYPE_USERPTR) {
+ is_userptr = true;
+ userptr = hnode->ptr;
+ rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+ &phys_pg_pack);
+ if (rc) {
+ dev_err(hdev->dev,
+ "unable to init page pack for vaddr 0x%llx\n",
+ vaddr);
+ goto vm_type_err;
+ }
+
+ if (phys_pg_pack->page_size ==
+ hdev->asic_prop.pmmu.page_size)
+ va_range = ctx->host_va_range;
+ else
+ va_range = ctx->host_huge_va_range;
+ } else if (*vm_type == VM_TYPE_PHYS_PACK) {
+ is_userptr = false;
+ va_range = ctx->dram_va_range;
+ phys_pg_pack = hnode->ptr;
+ } else {
+ dev_warn(hdev->dev,
+ "unmap failed, unknown vm desc for vaddr 0x%llx\n",
+ vaddr);
+ rc = -EFAULT;
+ goto vm_type_err;
+ }
+
+ if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
+ dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
+ rc = -EINVAL;
+ goto mapping_cnt_err;
+ }
+
+ vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
+
+ mutex_lock(&ctx->mmu_lock);
+
+ unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
+
+ /*
+ * During context free this function is called in a loop to clean all
+ * the context mappings. Hence the cache invalidation can be called once
+ * at the loop end rather than for each iteration
+ */
+ if (!ctx_free)
+ rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
+ *vm_type);
+
+ mutex_unlock(&ctx->mmu_lock);
+
+ /*
+ * If the context is closing we don't need to check for the MMU cache
+ * invalidation return code and update the VA free list as in this flow
+ * we invalidate the MMU cache outside of this unmap function and the VA
+ * free list will be freed anyway.
+ */
+ if (!ctx_free) {
+ int tmp_rc;
+
+ if (rc)
+ dev_err(hdev->dev,
+ "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
+ vaddr);
+
+ tmp_rc = add_va_block(hdev, va_range, vaddr,
+ vaddr + phys_pg_pack->total_size - 1);
+ if (tmp_rc) {
+ dev_warn(hdev->dev,
+ "add va block failed for vaddr: 0x%llx\n",
+ vaddr);
+ if (!rc)
+ rc = tmp_rc;
+ }
+ }
+
+ atomic_dec(&phys_pg_pack->mapping_cnt);
+ kfree(hnode);
+
+ if (is_userptr) {
+ free_phys_pg_pack(hdev, phys_pg_pack);
+ dma_unmap_host_va(hdev, userptr);
+ }
+
+ return rc;
+
+mapping_cnt_err:
+ if (is_userptr)
+ free_phys_pg_pack(hdev, phys_pg_pack);
+vm_type_err:
+ mutex_lock(&ctx->mem_hash_lock);
+ hash_add(ctx->mem_hash, &hnode->node, vaddr);
+ mutex_unlock(&ctx->mem_hash_lock);
+
+ return rc;
+}
+
+static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ctx *ctx = hpriv->ctx;
+ u64 device_addr = 0;
+ u32 handle = 0;
+ int rc;
+
+ switch (args->in.op) {
+ case HL_MEM_OP_ALLOC:
+ if (args->in.alloc.mem_size == 0) {
+ dev_err(hdev->dev,
+ "alloc size must be larger than 0\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* Force contiguous as there are no real MMU
+ * translations to overcome physical memory gaps
+ */
+ args->in.flags |= HL_MEM_CONTIGUOUS;
+ rc = alloc_device_memory(ctx, &args->in, &handle);
+
+ memset(args, 0, sizeof(*args));
+ args->out.handle = (__u64) handle;
+ break;
+
+ case HL_MEM_OP_FREE:
+ rc = free_device_memory(ctx, args->in.free.handle);
+ break;
+
+ case HL_MEM_OP_MAP:
+ if (args->in.flags & HL_MEM_USERPTR) {
+ device_addr = args->in.map_host.host_virt_addr;
+ rc = 0;
+ } else {
+ rc = get_paddr_from_handle(ctx, &args->in,
+ &device_addr);
+ }
+
+ memset(args, 0, sizeof(*args));
+ args->out.device_virt_addr = device_addr;
+ break;
+
+ case HL_MEM_OP_UNMAP:
+ rc = 0;
+ break;
+
+ default:
+ dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+ rc = -ENOTTY;
+ break;
+ }
+
+out:
+ return rc;
+}
+
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ union hl_mem_args *args = data;
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ctx *ctx = hpriv->ctx;
+ u64 device_addr = 0;
+ u32 handle = 0;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't execute MEMORY IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ return -EBUSY;
+ }
+
+ if (!hdev->mmu_enable)
+ return mem_ioctl_no_mmu(hpriv, args);
+
+ switch (args->in.op) {
+ case HL_MEM_OP_ALLOC:
+ if (!hdev->dram_supports_virtual_memory) {
+ dev_err(hdev->dev, "DRAM alloc is not supported\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (args->in.alloc.mem_size == 0) {
+ dev_err(hdev->dev,
+ "alloc size must be larger than 0\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = alloc_device_memory(ctx, &args->in, &handle);
+
+ memset(args, 0, sizeof(*args));
+ args->out.handle = (__u64) handle;
+ break;
+
+ case HL_MEM_OP_FREE:
+ rc = free_device_memory(ctx, args->in.free.handle);
+ break;
+
+ case HL_MEM_OP_MAP:
+ rc = map_device_va(ctx, &args->in, &device_addr);
+
+ memset(args, 0, sizeof(*args));
+ args->out.device_virt_addr = device_addr;
+ break;
+
+ case HL_MEM_OP_UNMAP:
+ rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
+ false);
+ break;
+
+ default:
+ dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+ rc = -ENOTTY;
+ break;
+ }
+
+out:
+ return rc;
+}
+
+static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
+ u32 npages, u64 start, u32 offset,
+ struct hl_userptr *userptr)
+{
+ int rc;
+
+ if (!access_ok((void __user *) (uintptr_t) addr, size)) {
+ dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
+ return -EFAULT;
+ }
+
+ userptr->vec = frame_vector_create(npages);
+ if (!userptr->vec) {
+ dev_err(hdev->dev, "Failed to create frame vector\n");
+ return -ENOMEM;
+ }
+
+ rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
+ userptr->vec);
+
+ if (rc != npages) {
+ dev_err(hdev->dev,
+ "Failed to map host memory, user ptr probably wrong\n");
+ if (rc < 0)
+ goto destroy_framevec;
+ rc = -EFAULT;
+ goto put_framevec;
+ }
+
+ if (frame_vector_to_pages(userptr->vec) < 0) {
+ dev_err(hdev->dev,
+ "Failed to translate frame vector to pages\n");
+ rc = -EFAULT;
+ goto put_framevec;
+ }
+
+ rc = sg_alloc_table_from_pages(userptr->sgt,
+ frame_vector_pages(userptr->vec),
+ npages, offset, size, GFP_ATOMIC);
+ if (rc < 0) {
+ dev_err(hdev->dev, "failed to create SG table from pages\n");
+ goto put_framevec;
+ }
+
+ return 0;
+
+put_framevec:
+ put_vaddr_frames(userptr->vec);
+destroy_framevec:
+ frame_vector_destroy(userptr->vec);
+ return rc;
+}
+
+/*
+ * hl_pin_host_memory - pins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure
+ * @addr: the host virtual address of the memory area
+ * @size: the size of the memory area
+ * @userptr: pointer to hl_userptr structure
+ *
+ * This function does the following:
+ * - Pins the physical pages
+ * - Create an SG list from those pages
+ */
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
+ struct hl_userptr *userptr)
+{
+ u64 start, end;
+ u32 npages, offset;
+ int rc;
+
+ if (!size) {
+ dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
+ return -EINVAL;
+ }
+
+ /*
+ * If the combination of the address and size requested for this memory
+ * region causes an integer overflow, return error.
+ */
+ if (((addr + size) < addr) ||
+ PAGE_ALIGN(addr + size) < (addr + size)) {
+ dev_err(hdev->dev,
+ "user pointer 0x%llx + %llu causes integer overflow\n",
+ addr, size);
+ return -EINVAL;
+ }
+
+ /*
+ * This function can be called also from data path, hence use atomic
+ * always as it is not a big allocation.
+ */
+ userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
+ if (!userptr->sgt)
+ return -ENOMEM;
+
+ start = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ end = PAGE_ALIGN(addr + size);
+ npages = (end - start) >> PAGE_SHIFT;
+
+ userptr->size = size;
+ userptr->addr = addr;
+ userptr->dma_mapped = false;
+ INIT_LIST_HEAD(&userptr->job_node);
+
+ rc = get_user_memory(hdev, addr, size, npages, start, offset,
+ userptr);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to get user memory for address 0x%llx\n",
+ addr);
+ goto free_sgt;
+ }
+
+ hl_debugfs_add_userptr(hdev, userptr);
+
+ return 0;
+
+free_sgt:
+ kfree(userptr->sgt);
+ return rc;
+}
+
+/*
+ * hl_unpin_host_memory - unpins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure
+ * @userptr: pointer to hl_userptr structure
+ *
+ * This function does the following:
+ * - Unpins the physical pages related to the host memory
+ * - Free the SG list
+ */
+void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
+{
+ struct page **pages;
+
+ hl_debugfs_remove_userptr(hdev, userptr);
+
+ if (userptr->dma_mapped)
+ hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
+ userptr->sgt->nents,
+ userptr->dir);
+
+ pages = frame_vector_pages(userptr->vec);
+ if (!IS_ERR(pages)) {
+ int i;
+
+ for (i = 0; i < frame_vector_count(userptr->vec); i++)
+ set_page_dirty_lock(pages[i]);
+ }
+ put_vaddr_frames(userptr->vec);
+ frame_vector_destroy(userptr->vec);
+
+ list_del(&userptr->job_node);
+
+ sg_free_table(userptr->sgt);
+ kfree(userptr->sgt);
+}
+
+/*
+ * hl_userptr_delete_list - clear userptr list
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @userptr_list : pointer to the list to clear
+ *
+ * This function does the following:
+ * - Iterates over the list and unpins the host memory and frees the userptr
+ * structure.
+ */
+void hl_userptr_delete_list(struct hl_device *hdev,
+ struct list_head *userptr_list)
+{
+ struct hl_userptr *userptr, *tmp;
+
+ list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
+ hl_unpin_host_memory(hdev, userptr);
+ kfree(userptr);
+ }
+
+ INIT_LIST_HEAD(userptr_list);
+}
+
+/*
+ * hl_userptr_is_pinned - returns whether the given userptr is pinned
+ *
+ * @hdev : pointer to the habanalabs device structure
+ * @userptr_list : pointer to the list to clear
+ * @userptr : pointer to userptr to check
+ *
+ * This function does the following:
+ * - Iterates over the list and checks if the given userptr is in it, means is
+ * pinned. If so, returns true, otherwise returns false.
+ */
+bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
+ u32 size, struct list_head *userptr_list,
+ struct hl_userptr **userptr)
+{
+ list_for_each_entry((*userptr), userptr_list, job_node) {
+ if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * va_range_init - initialize virtual addresses range
+ * @hdev: pointer to the habanalabs device structure
+ * @va_range: pointer to the range to initialize
+ * @start: range start address
+ * @end: range end address
+ *
+ * This function does the following:
+ * - Initializes the virtual addresses list of the given range with the given
+ * addresses.
+ */
+static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
+ u64 start, u64 end)
+{
+ int rc;
+
+ INIT_LIST_HEAD(&va_range->list);
+
+ /* PAGE_SIZE alignment */
+
+ if (start & (PAGE_SIZE - 1)) {
+ start &= PAGE_MASK;
+ start += PAGE_SIZE;
+ }
+
+ if (end & (PAGE_SIZE - 1))
+ end &= PAGE_MASK;
+
+ if (start >= end) {
+ dev_err(hdev->dev, "too small vm range for va list\n");
+ return -EFAULT;
+ }
+
+ rc = add_va_block(hdev, va_range, start, end);
+
+ if (rc) {
+ dev_err(hdev->dev, "Failed to init host va list\n");
+ return rc;
+ }
+
+ va_range->start_addr = start;
+ va_range->end_addr = end;
+
+ return 0;
+}
+
+/*
+ * va_range_fini() - clear a virtual addresses range
+ * @hdev: pointer to the habanalabs structure
+ * va_range: pointer to virtual addresses range
+ *
+ * This function does the following:
+ * - Frees the virtual addresses block list and its lock
+ */
+static void va_range_fini(struct hl_device *hdev,
+ struct hl_va_range *va_range)
+{
+ mutex_lock(&va_range->lock);
+ clear_va_list_locked(hdev, &va_range->list);
+ mutex_unlock(&va_range->lock);
+
+ mutex_destroy(&va_range->lock);
+ kfree(va_range);
+}
+
+/*
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context
+ * @ctx: pointer to the habanalabs context structure
+ * @host_range_start: host virtual addresses range start.
+ * @host_range_end: host virtual addresses range end.
+ * @host_huge_range_start: host virtual addresses range start for memory
+ * allocated with huge pages.
+ * @host_huge_range_end: host virtual addresses range end for memory allocated
+ * with huge pages.
+ * @dram_range_start: dram virtual addresses range start.
+ * @dram_range_end: dram virtual addresses range end.
+ *
+ * This function initializes the following:
+ * - MMU for context
+ * - Virtual address to area descriptor hashtable
+ * - Virtual block list of available virtual memory
+ */
+static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
+ u64 host_range_start,
+ u64 host_range_end,
+ u64 host_huge_range_start,
+ u64 host_huge_range_end,
+ u64 dram_range_start,
+ u64 dram_range_end)
+{
+ struct hl_device *hdev = ctx->hdev;
+ int rc;
+
+ ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
+ if (!ctx->host_va_range)
+ return -ENOMEM;
+
+ ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
+ GFP_KERNEL);
+ if (!ctx->host_huge_va_range) {
+ rc = -ENOMEM;
+ goto host_huge_va_range_err;
+ }
+
+ ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
+ if (!ctx->dram_va_range) {
+ rc = -ENOMEM;
+ goto dram_va_range_err;
+ }
+
+ rc = hl_mmu_ctx_init(ctx);
+ if (rc) {
+ dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
+ goto mmu_ctx_err;
+ }
+
+ mutex_init(&ctx->mem_hash_lock);
+ hash_init(ctx->mem_hash);
+
+ mutex_init(&ctx->host_va_range->lock);
+
+ rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
+ host_range_end);
+ if (rc) {
+ dev_err(hdev->dev, "failed to init host vm range\n");
+ goto host_page_range_err;
+ }
+
+ if (hdev->pmmu_huge_range) {
+ mutex_init(&ctx->host_huge_va_range->lock);
+
+ rc = va_range_init(hdev, ctx->host_huge_va_range,
+ host_huge_range_start,
+ host_huge_range_end);
+ if (rc) {
+ dev_err(hdev->dev,
+ "failed to init host huge vm range\n");
+ goto host_hpage_range_err;
+ }
+ } else {
+ ctx->host_huge_va_range = ctx->host_va_range;
+ }
+
+ mutex_init(&ctx->dram_va_range->lock);
+
+ rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
+ dram_range_end);
+ if (rc) {
+ dev_err(hdev->dev, "failed to init dram vm range\n");
+ goto dram_vm_err;
+ }
+
+ hl_debugfs_add_ctx_mem_hash(hdev, ctx);
+
+ return 0;
+
+dram_vm_err:
+ mutex_destroy(&ctx->dram_va_range->lock);
+
+ if (hdev->pmmu_huge_range) {
+ mutex_lock(&ctx->host_huge_va_range->lock);
+ clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
+ mutex_unlock(&ctx->host_huge_va_range->lock);
+ }
+host_hpage_range_err:
+ if (hdev->pmmu_huge_range)
+ mutex_destroy(&ctx->host_huge_va_range->lock);
+ mutex_lock(&ctx->host_va_range->lock);
+ clear_va_list_locked(hdev, &ctx->host_va_range->list);
+ mutex_unlock(&ctx->host_va_range->lock);
+host_page_range_err:
+ mutex_destroy(&ctx->host_va_range->lock);
+ mutex_destroy(&ctx->mem_hash_lock);
+ hl_mmu_ctx_fini(ctx);
+mmu_ctx_err:
+ kfree(ctx->dram_va_range);
+dram_va_range_err:
+ kfree(ctx->host_huge_va_range);
+host_huge_va_range_err:
+ kfree(ctx->host_va_range);
+
+ return rc;
+}
+
+int hl_vm_ctx_init(struct hl_ctx *ctx)
+{
+ struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+ u64 host_range_start, host_range_end, host_huge_range_start,
+ host_huge_range_end, dram_range_start, dram_range_end;
+
+ atomic64_set(&ctx->dram_phys_mem, 0);
+
+ /*
+ * - If MMU is enabled, init the ranges as usual.
+ * - If MMU is disabled, in case of host mapping, the returned address
+ * is the given one.
+ * In case of DRAM mapping, the returned address is the physical
+ * address of the memory related to the given handle.
+ */
+ if (ctx->hdev->mmu_enable) {
+ dram_range_start = prop->dmmu.start_addr;
+ dram_range_end = prop->dmmu.end_addr;
+ host_range_start = prop->pmmu.start_addr;
+ host_range_end = prop->pmmu.end_addr;
+ host_huge_range_start = prop->pmmu_huge.start_addr;
+ host_huge_range_end = prop->pmmu_huge.end_addr;
+ } else {
+ dram_range_start = prop->dram_user_base_address;
+ dram_range_end = prop->dram_end_address;
+ host_range_start = prop->dram_user_base_address;
+ host_range_end = prop->dram_end_address;
+ host_huge_range_start = prop->dram_user_base_address;
+ host_huge_range_end = prop->dram_end_address;
+ }
+
+ return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
+ host_huge_range_start,
+ host_huge_range_end,
+ dram_range_start,
+ dram_range_end);
+}
+
+/*
+ * hl_vm_ctx_fini - virtual memory teardown of context
+ *
+ * @ctx : pointer to the habanalabs context structure
+ *
+ * This function perform teardown the following:
+ * - Virtual block list of available virtual memory
+ * - Virtual address to area descriptor hashtable
+ * - MMU for context
+ *
+ * In addition this function does the following:
+ * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
+ * hashtable should be empty as no valid mappings should exist at this
+ * point.
+ * - Frees any existing physical page list from the idr which relates to the
+ * current context asid.
+ * - This function checks the virtual block list for correctness. At this point
+ * the list should contain one element which describes the whole virtual
+ * memory range of the context. Otherwise, a warning is printed.
+ */
+void hl_vm_ctx_fini(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct hl_vm *vm = &hdev->vm;
+ struct hl_vm_phys_pg_pack *phys_pg_list;
+ struct hl_vm_hash_node *hnode;
+ struct hlist_node *tmp_node;
+ int i;
+
+ hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
+
+ /*
+ * Clearly something went wrong on hard reset so no point in printing
+ * another side effect error
+ */
+ if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
+ dev_notice(hdev->dev,
+ "user released device without removing its memory mappings\n");
+
+ hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
+ dev_dbg(hdev->dev,
+ "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
+ hnode->vaddr, ctx->asid);
+ unmap_device_va(ctx, hnode->vaddr, true);
+ }
+
+ /* invalidate the cache once after the unmapping loop */
+ hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+ hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
+
+ spin_lock(&vm->idr_lock);
+ idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
+ if (phys_pg_list->asid == ctx->asid) {
+ dev_dbg(hdev->dev,
+ "page list 0x%px of asid %d is still alive\n",
+ phys_pg_list, ctx->asid);
+ atomic64_sub(phys_pg_list->total_size,
+ &hdev->dram_used_mem);
+ free_phys_pg_pack(hdev, phys_pg_list);
+ idr_remove(&vm->phys_pg_pack_handles, i);
+ }
+ spin_unlock(&vm->idr_lock);
+
+ va_range_fini(hdev, ctx->dram_va_range);
+ if (hdev->pmmu_huge_range)
+ va_range_fini(hdev, ctx->host_huge_va_range);
+ va_range_fini(hdev, ctx->host_va_range);
+
+ mutex_destroy(&ctx->mem_hash_lock);
+ hl_mmu_ctx_fini(ctx);
+}
+
+/*
+ * hl_vm_init - initialize virtual memory module
+ *
+ * @hdev : pointer to the habanalabs device structure
+ *
+ * This function initializes the following:
+ * - MMU module
+ * - DRAM physical pages pool of 2MB
+ * - Idr for device memory allocation handles
+ */
+int hl_vm_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_vm *vm = &hdev->vm;
+ int rc;
+
+ vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+ if (!vm->dram_pg_pool) {
+ dev_err(hdev->dev, "Failed to create dram page pool\n");
+ return -ENOMEM;
+ }
+
+ kref_init(&vm->dram_pg_pool_refcount);
+
+ rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
+ prop->dram_end_address - prop->dram_user_base_address,
+ -1);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to add memory to dram page pool %d\n", rc);
+ goto pool_add_err;
+ }
+
+ spin_lock_init(&vm->idr_lock);
+ idr_init(&vm->phys_pg_pack_handles);
+
+ atomic64_set(&hdev->dram_used_mem, 0);
+
+ vm->init_done = true;
+
+ return 0;
+
+pool_add_err:
+ gen_pool_destroy(vm->dram_pg_pool);
+
+ return rc;
+}
+
+/*
+ * hl_vm_fini - virtual memory module teardown
+ *
+ * @hdev : pointer to the habanalabs device structure
+ *
+ * This function perform teardown to the following:
+ * - Idr for device memory allocation handles
+ * - DRAM physical pages pool of 2MB
+ * - MMU module
+ */
+void hl_vm_fini(struct hl_device *hdev)
+{
+ struct hl_vm *vm = &hdev->vm;
+
+ if (!vm->init_done)
+ return;
+
+ /*
+ * At this point all the contexts should be freed and hence no DRAM
+ * memory should be in use. Hence the DRAM pool should be freed here.
+ */
+ if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
+ dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
+ __func__);
+
+ vm->init_done = false;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+
+static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
+
+static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = NULL;
+
+ hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
+ (unsigned long) hop_addr)
+ if (hop_addr == pgt_info->shadow_addr)
+ break;
+
+ return pgt_info;
+}
+
+static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
+{
+ struct hl_device *hdev = ctx->hdev;
+
+ gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
+ hdev->asic_prop.mmu_hop_table_size);
+ hash_del(&pgt_info->node);
+ kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
+ kfree(pgt_info);
+}
+
+static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+
+ _free_hop(ctx, pgt_info);
+}
+
+static u64 alloc_hop(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct pgt_info *pgt_info;
+ u64 phys_addr, shadow_addr;
+
+ pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
+ if (!pgt_info)
+ return ULLONG_MAX;
+
+ phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
+ prop->mmu_hop_table_size);
+ if (!phys_addr) {
+ dev_err(hdev->dev, "failed to allocate page\n");
+ goto pool_add_err;
+ }
+
+ shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
+ GFP_KERNEL);
+ if (!shadow_addr)
+ goto shadow_err;
+
+ pgt_info->phys_addr = phys_addr;
+ pgt_info->shadow_addr = shadow_addr;
+ pgt_info->ctx = ctx;
+ pgt_info->num_of_ptes = 0;
+ hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
+
+ return shadow_addr;
+
+shadow_err:
+ gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
+pool_add_err:
+ kfree(pgt_info);
+
+ return ULLONG_MAX;
+}
+
+static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
+{
+ return ctx->hdev->asic_prop.mmu_pgt_addr +
+ (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+{
+ return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
+ (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline void flush(struct hl_ctx *ctx)
+{
+ /* flush all writes from all cores to reach PCI */
+ mb();
+ ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
+}
+
+/* transform the value to physical address when writing to H/W */
+static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
+{
+ /*
+ * The value to write is actually the address of the next shadow hop +
+ * flags at the 12 LSBs.
+ * Hence in order to get the value to write to the physical PTE, we
+ * clear the 12 LSBs and translate the shadow hop to its associated
+ * physical hop, and add back the original 12 LSBs.
+ */
+ u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
+ (val & FLAGS_MASK);
+
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+ get_phys_addr(ctx, shadow_pte_addr),
+ phys_val);
+
+ *(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+/* do not transform the value to physical address when writing to H/W */
+static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
+ u64 val)
+{
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+ get_phys_addr(ctx, shadow_pte_addr),
+ val);
+ *(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
+
+/* clear the last and present bits */
+static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
+{
+ /* no need to transform the value to physical address */
+ write_final_pte(ctx, pte_addr, 0);
+}
+
+static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+ get_pgt_info(ctx, hop_addr)->num_of_ptes++;
+}
+
+/*
+ * put_pte - decrement the num of ptes and free the hop if possible
+ *
+ * @ctx: pointer to the context structure
+ * @hop_addr: addr of the hop
+ *
+ * This function returns the number of ptes left on this hop. If the number is
+ * 0, it means the pte was freed.
+ */
+static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+ int num_of_ptes_left;
+
+ pgt_info->num_of_ptes--;
+
+ /*
+ * Need to save the number of ptes left because free_hop might free
+ * the pgt_info
+ */
+ num_of_ptes_left = pgt_info->num_of_ptes;
+ if (!num_of_ptes_left)
+ _free_hop(ctx, pgt_info);
+
+ return num_of_ptes_left;
+}
+
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+ u64 virt_addr, u64 mask, u64 shift)
+{
+ return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
+ ((virt_addr & mask) >> shift);
+}
+
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
+ mmu_prop->hop0_shift);
+}
+
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
+ mmu_prop->hop1_shift);
+}
+
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
+ mmu_prop->hop2_shift);
+}
+
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
+ mmu_prop->hop3_shift);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+ struct hl_mmu_properties *mmu_prop,
+ u64 hop_addr, u64 vaddr)
+{
+ return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
+ mmu_prop->hop4_shift);
+}
+
+static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
+{
+ if (curr_pte & PAGE_PRESENT_MASK)
+ return curr_pte & HOP_PHYS_ADDR_MASK;
+ else
+ return ULLONG_MAX;
+}
+
+static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
+ bool *is_new_hop)
+{
+ u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
+
+ if (hop_addr == ULLONG_MAX) {
+ hop_addr = alloc_hop(ctx);
+ *is_new_hop = (hop_addr != ULLONG_MAX);
+ }
+
+ return hop_addr;
+}
+
+/* translates shadow address inside hop to a physical address */
+static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
+{
+ u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
+ u64 shadow_hop_addr = shadow_addr & ~page_mask;
+ u64 pte_offset = shadow_addr & page_mask;
+ u64 phys_hop_addr;
+
+ if (shadow_hop_addr != get_hop0_addr(ctx))
+ phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
+ else
+ phys_hop_addr = get_phys_hop0_addr(ctx);
+
+ return phys_hop_addr + pte_offset;
+}
+
+static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+ return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ prop->dmmu.start_addr,
+ prop->dmmu.end_addr);
+}
+
+static int dram_default_mapping_init(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
+ hop2_pte_addr, hop3_pte_addr, pte_val;
+ int rc, i, j, hop3_allocated = 0;
+
+ if ((!hdev->dram_supports_virtual_memory) ||
+ (!hdev->dram_default_page_mapping) ||
+ (ctx->asid == HL_KERNEL_ASID_ID))
+ return 0;
+
+ num_of_hop3 = prop->dram_size_for_default_page_mapping;
+ do_div(num_of_hop3, prop->dram_page_size);
+ do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
+
+ /* add hop1 and hop2 */
+ total_hops = num_of_hop3 + 2;
+
+ ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
+ if (!ctx->dram_default_hops)
+ return -ENOMEM;
+
+ hop0_addr = get_hop0_addr(ctx);
+
+ hop1_addr = alloc_hop(ctx);
+ if (hop1_addr == ULLONG_MAX) {
+ dev_err(hdev->dev, "failed to alloc hop 1\n");
+ rc = -ENOMEM;
+ goto hop1_err;
+ }
+
+ ctx->dram_default_hops[total_hops - 1] = hop1_addr;
+
+ hop2_addr = alloc_hop(ctx);
+ if (hop2_addr == ULLONG_MAX) {
+ dev_err(hdev->dev, "failed to alloc hop 2\n");
+ rc = -ENOMEM;
+ goto hop2_err;
+ }
+
+ ctx->dram_default_hops[total_hops - 2] = hop2_addr;
+
+ for (i = 0 ; i < num_of_hop3 ; i++) {
+ ctx->dram_default_hops[i] = alloc_hop(ctx);
+ if (ctx->dram_default_hops[i] == ULLONG_MAX) {
+ dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
+ rc = -ENOMEM;
+ goto hop3_err;
+ }
+ hop3_allocated++;
+ }
+
+ /* need only pte 0 in hops 0 and 1 */
+ pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop0_addr, pte_val);
+
+ pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop1_addr, pte_val);
+ get_pte(ctx, hop1_addr);
+
+ hop2_pte_addr = hop2_addr;
+ for (i = 0 ; i < num_of_hop3 ; i++) {
+ pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
+ PAGE_PRESENT_MASK;
+ write_pte(ctx, hop2_pte_addr, pte_val);
+ get_pte(ctx, hop2_addr);
+ hop2_pte_addr += HL_PTE_SIZE;
+ }
+
+ pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
+ LAST_MASK | PAGE_PRESENT_MASK;
+
+ for (i = 0 ; i < num_of_hop3 ; i++) {
+ hop3_pte_addr = ctx->dram_default_hops[i];
+ for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
+ write_final_pte(ctx, hop3_pte_addr, pte_val);
+ get_pte(ctx, ctx->dram_default_hops[i]);
+ hop3_pte_addr += HL_PTE_SIZE;
+ }
+ }
+
+ flush(ctx);
+
+ return 0;
+
+hop3_err:
+ for (i = 0 ; i < hop3_allocated ; i++)
+ free_hop(ctx, ctx->dram_default_hops[i]);
+
+ free_hop(ctx, hop2_addr);
+hop2_err:
+ free_hop(ctx, hop1_addr);
+hop1_err:
+ kfree(ctx->dram_default_hops);
+
+ return rc;
+}
+
+static void dram_default_mapping_fini(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
+ hop2_pte_addr, hop3_pte_addr;
+ int i, j;
+
+ if ((!hdev->dram_supports_virtual_memory) ||
+ (!hdev->dram_default_page_mapping) ||
+ (ctx->asid == HL_KERNEL_ASID_ID))
+ return;
+
+ num_of_hop3 = prop->dram_size_for_default_page_mapping;
+ do_div(num_of_hop3, prop->dram_page_size);
+ do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
+
+ hop0_addr = get_hop0_addr(ctx);
+ /* add hop1 and hop2 */
+ total_hops = num_of_hop3 + 2;
+ hop1_addr = ctx->dram_default_hops[total_hops - 1];
+ hop2_addr = ctx->dram_default_hops[total_hops - 2];
+
+ for (i = 0 ; i < num_of_hop3 ; i++) {
+ hop3_pte_addr = ctx->dram_default_hops[i];
+ for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
+ clear_pte(ctx, hop3_pte_addr);
+ put_pte(ctx, ctx->dram_default_hops[i]);
+ hop3_pte_addr += HL_PTE_SIZE;
+ }
+ }
+
+ hop2_pte_addr = hop2_addr;
+ hop2_pte_addr = hop2_addr;
+ for (i = 0 ; i < num_of_hop3 ; i++) {
+ clear_pte(ctx, hop2_pte_addr);
+ put_pte(ctx, hop2_addr);
+ hop2_pte_addr += HL_PTE_SIZE;
+ }
+
+ clear_pte(ctx, hop1_addr);
+ put_pte(ctx, hop1_addr);
+ clear_pte(ctx, hop0_addr);
+
+ kfree(ctx->dram_default_hops);
+
+ flush(ctx);
+}
+
+/**
+ * hl_mmu_init() - initialize the MMU module.
+ * @hdev: habanalabs device structure.
+ *
+ * This function does the following:
+ * - Create a pool of pages for pgt_infos.
+ * - Create a shadow table for pgt
+ *
+ * Return: 0 for success, non-zero for failure.
+ */
+int hl_mmu_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ hdev->mmu_pgt_pool =
+ gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
+
+ if (!hdev->mmu_pgt_pool) {
+ dev_err(hdev->dev, "Failed to create page gen pool\n");
+ return -ENOMEM;
+ }
+
+ rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
+ prop->mmu_hop0_tables_total_size,
+ prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
+ -1);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
+ goto err_pool_add;
+ }
+
+ hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
+ prop->mmu_hop_table_size,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!hdev->mmu_shadow_hop0) {
+ rc = -ENOMEM;
+ goto err_pool_add;
+ }
+
+ /* MMU H/W init will be done in device hw_init() */
+
+ return 0;
+
+err_pool_add:
+ gen_pool_destroy(hdev->mmu_pgt_pool);
+
+ return rc;
+}
+
+/**
+ * hl_mmu_fini() - release the MMU module.
+ * @hdev: habanalabs device structure.
+ *
+ * This function does the following:
+ * - Disable MMU in H/W.
+ * - Free the pgt_infos pool.
+ *
+ * All contexts should be freed before calling this function.
+ */
+void hl_mmu_fini(struct hl_device *hdev)
+{
+ if (!hdev->mmu_enable)
+ return;
+
+ /* MMU H/W fini was already done in device hw_fini() */
+
+ kvfree(hdev->mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_pgt_pool);
+}
+
+/**
+ * hl_mmu_ctx_init() - initialize a context for using the MMU module.
+ * @ctx: pointer to the context structure to initialize.
+ *
+ * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
+ * page tables hops related to this context.
+ * Return: 0 on success, non-zero otherwise.
+ */
+int hl_mmu_ctx_init(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ mutex_init(&ctx->mmu_lock);
+ hash_init(ctx->mmu_shadow_hash);
+
+ return dram_default_mapping_init(ctx);
+}
+
+/*
+ * hl_mmu_ctx_fini - disable a ctx from using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Free any pgts which were not freed yet
+ * - Free the mutex
+ * - Free DRAM default page mapping hops
+ */
+void hl_mmu_ctx_fini(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct pgt_info *pgt_info;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!hdev->mmu_enable)
+ return;
+
+ dram_default_mapping_fini(ctx);
+
+ if (!hash_empty(ctx->mmu_shadow_hash))
+ dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
+ ctx->asid);
+
+ hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
+ dev_err_ratelimited(hdev->dev,
+ "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
+ pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
+ _free_hop(ctx, pgt_info);
+ }
+
+ mutex_destroy(&ctx->mmu_lock);
+}
+
+static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ u64 hop0_addr = 0, hop0_pte_addr = 0,
+ hop1_addr = 0, hop1_pte_addr = 0,
+ hop2_addr = 0, hop2_pte_addr = 0,
+ hop3_addr = 0, hop3_pte_addr = 0,
+ hop4_addr = 0, hop4_pte_addr = 0,
+ curr_pte;
+ bool is_huge, clear_hop3 = true;
+
+ /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
+ hop0_addr = get_hop0_addr(ctx);
+ hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
+
+ curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
+
+ hop1_addr = get_next_hop_addr(ctx, curr_pte);
+
+ if (hop1_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
+
+ curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
+
+ hop2_addr = get_next_hop_addr(ctx, curr_pte);
+
+ if (hop2_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
+
+ curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
+
+ hop3_addr = get_next_hop_addr(ctx, curr_pte);
+
+ if (hop3_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+
+ curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
+
+ is_huge = curr_pte & LAST_MASK;
+
+ if (is_dram_addr && !is_huge) {
+ dev_err(hdev->dev,
+ "DRAM unmapping should use huge pages only\n");
+ return -EFAULT;
+ }
+
+ if (!is_huge) {
+ hop4_addr = get_next_hop_addr(ctx, curr_pte);
+
+ if (hop4_addr == ULLONG_MAX)
+ goto not_mapped;
+
+ hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+ virt_addr);
+
+ curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
+
+ clear_hop3 = false;
+ }
+
+ if (hdev->dram_default_page_mapping && is_dram_addr) {
+ u64 default_pte = (prop->mmu_dram_default_page_addr &
+ HOP_PHYS_ADDR_MASK) | LAST_MASK |
+ PAGE_PRESENT_MASK;
+ if (curr_pte == default_pte) {
+ dev_err(hdev->dev,
+ "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
+ virt_addr);
+ goto not_mapped;
+ }
+
+ if (!(curr_pte & PAGE_PRESENT_MASK)) {
+ dev_err(hdev->dev,
+ "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
+ virt_addr);
+ goto not_mapped;
+ }
+
+ write_final_pte(ctx, hop3_pte_addr, default_pte);
+ put_pte(ctx, hop3_addr);
+ } else {
+ if (!(curr_pte & PAGE_PRESENT_MASK))
+ goto not_mapped;
+
+ if (hop4_addr)
+ clear_pte(ctx, hop4_pte_addr);
+ else
+ clear_pte(ctx, hop3_pte_addr);
+
+ if (hop4_addr && !put_pte(ctx, hop4_addr))
+ clear_hop3 = true;
+
+ if (!clear_hop3)
+ goto mapped;
+
+ clear_pte(ctx, hop3_pte_addr);
+
+ if (put_pte(ctx, hop3_addr))
+ goto mapped;
+
+ clear_pte(ctx, hop2_pte_addr);
+
+ if (put_pte(ctx, hop2_addr))
+ goto mapped;
+
+ clear_pte(ctx, hop1_pte_addr);
+
+ if (put_pte(ctx, hop1_addr))
+ goto mapped;
+
+ clear_pte(ctx, hop0_pte_addr);
+ }
+
+mapped:
+ return 0;
+
+not_mapped:
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+
+ return -EINVAL;
+}
+
+/*
+ * hl_mmu_unmap - unmaps a virtual addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @page_size: size of the page to unmap
+ * @flush_pte: whether to do a PCI flush
+ *
+ * This function does the following:
+ * - Check that the virt addr is mapped
+ * - Unmap the virt addr and frees pgts if possible
+ * - Returns 0 on success, -EINVAL if the given addr is not mapped
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after unmapping of
+ * large area.
+ */
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+ bool flush_pte)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ u64 real_virt_addr;
+ u32 real_page_size, npages;
+ int i, rc = 0;
+ bool is_dram_addr;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ is_dram_addr = is_dram_va(hdev, virt_addr);
+
+ if (is_dram_addr)
+ mmu_prop = &prop->dmmu;
+ else if ((page_size % prop->pmmu_huge.page_size) == 0)
+ mmu_prop = &prop->pmmu_huge;
+ else
+ mmu_prop = &prop->pmmu;
+
+ /*
+ * The H/W handles mapping of specific page sizes. Hence if the page
+ * size is bigger, we break it to sub-pages and unmap them separately.
+ */
+ if ((page_size % mmu_prop->page_size) == 0) {
+ real_page_size = mmu_prop->page_size;
+ } else {
+ dev_err(hdev->dev,
+ "page size of %u is not %uKB aligned, can't unmap\n",
+ page_size, mmu_prop->page_size >> 10);
+
+ return -EFAULT;
+ }
+
+ npages = page_size / real_page_size;
+ real_virt_addr = virt_addr;
+
+ for (i = 0 ; i < npages ; i++) {
+ rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
+ if (rc)
+ break;
+
+ real_virt_addr += real_page_size;
+ }
+
+ if (flush_pte)
+ flush(ctx);
+
+ return rc;
+}
+
+static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ u32 page_size, bool is_dram_addr)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ u64 hop0_addr = 0, hop0_pte_addr = 0,
+ hop1_addr = 0, hop1_pte_addr = 0,
+ hop2_addr = 0, hop2_pte_addr = 0,
+ hop3_addr = 0, hop3_pte_addr = 0,
+ hop4_addr = 0, hop4_pte_addr = 0,
+ curr_pte = 0;
+ bool hop1_new = false, hop2_new = false, hop3_new = false,
+ hop4_new = false, is_huge;
+ int rc = -ENOMEM;
+
+ /*
+ * This mapping function can map a page or a huge page. For huge page
+ * there are only 3 hops rather than 4. Currently the DRAM allocation
+ * uses huge pages only but user memory could have been allocated with
+ * one of the two page sizes. Since this is a common code for all the
+ * three cases, we need this hugs page check.
+ */
+ if (is_dram_addr) {
+ mmu_prop = &prop->dmmu;
+ is_huge = true;
+ } else if (page_size == prop->pmmu_huge.page_size) {
+ mmu_prop = &prop->pmmu_huge;
+ is_huge = true;
+ } else {
+ mmu_prop = &prop->pmmu;
+ is_huge = false;
+ }
+
+ hop0_addr = get_hop0_addr(ctx);
+ hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
+
+ hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
+ if (hop1_addr == ULLONG_MAX)
+ goto err;
+
+ hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
+
+ hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
+ if (hop2_addr == ULLONG_MAX)
+ goto err;
+
+ hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
+
+ hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
+ if (hop3_addr == ULLONG_MAX)
+ goto err;
+
+ hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
+
+ if (!is_huge) {
+ hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
+ if (hop4_addr == ULLONG_MAX)
+ goto err;
+
+ hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+ virt_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
+ }
+
+ if (hdev->dram_default_page_mapping && is_dram_addr) {
+ u64 default_pte = (prop->mmu_dram_default_page_addr &
+ HOP_PHYS_ADDR_MASK) | LAST_MASK |
+ PAGE_PRESENT_MASK;
+
+ if (curr_pte != default_pte) {
+ dev_err(hdev->dev,
+ "DRAM: mapping already exists for virt_addr 0x%llx\n",
+ virt_addr);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (hop1_new || hop2_new || hop3_new || hop4_new) {
+ dev_err(hdev->dev,
+ "DRAM mapping should not allocate more hops\n");
+ rc = -EFAULT;
+ goto err;
+ }
+ } else if (curr_pte & PAGE_PRESENT_MASK) {
+ dev_err(hdev->dev,
+ "mapping already exists for virt_addr 0x%llx\n",
+ virt_addr);
+
+ dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
+ *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
+ dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
+ *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
+ dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
+ *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
+ dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
+ *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
+
+ if (!is_huge)
+ dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
+ *(u64 *) (uintptr_t) hop4_pte_addr,
+ hop4_pte_addr);
+
+ rc = -EINVAL;
+ goto err;
+ }
+
+ curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
+ | PAGE_PRESENT_MASK;
+
+ if (is_huge)
+ write_final_pte(ctx, hop3_pte_addr, curr_pte);
+ else
+ write_final_pte(ctx, hop4_pte_addr, curr_pte);
+
+ if (hop1_new) {
+ curr_pte =
+ (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop0_pte_addr, curr_pte);
+ }
+ if (hop2_new) {
+ curr_pte =
+ (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop1_pte_addr, curr_pte);
+ get_pte(ctx, hop1_addr);
+ }
+ if (hop3_new) {
+ curr_pte =
+ (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop2_pte_addr, curr_pte);
+ get_pte(ctx, hop2_addr);
+ }
+
+ if (!is_huge) {
+ if (hop4_new) {
+ curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
+ PAGE_PRESENT_MASK;
+ write_pte(ctx, hop3_pte_addr, curr_pte);
+ get_pte(ctx, hop3_addr);
+ }
+
+ get_pte(ctx, hop4_addr);
+ } else {
+ get_pte(ctx, hop3_addr);
+ }
+
+ return 0;
+
+err:
+ if (hop4_new)
+ free_hop(ctx, hop4_addr);
+ if (hop3_new)
+ free_hop(ctx, hop3_addr);
+ if (hop2_new)
+ free_hop(ctx, hop2_addr);
+ if (hop1_new)
+ free_hop(ctx, hop1_addr);
+
+ return rc;
+}
+
+/*
+ * hl_mmu_map - maps a virtual addr to physical addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @page_size: physical page size
+ * @flush_pte: whether to do a PCI flush
+ *
+ * This function does the following:
+ * - Check that the virt addr is not mapped
+ * - Allocate pgts as necessary in order to map the virt addr to the phys
+ * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ *
+ * For optimization reasons PCI flush may be requested once after mapping of
+ * large area.
+ */
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
+ bool flush_pte)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_mmu_properties *mmu_prop;
+ u64 real_virt_addr, real_phys_addr;
+ u32 real_page_size, npages;
+ int i, rc, mapped_cnt = 0;
+ bool is_dram_addr;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ is_dram_addr = is_dram_va(hdev, virt_addr);
+
+ if (is_dram_addr)
+ mmu_prop = &prop->dmmu;
+ else if ((page_size % prop->pmmu_huge.page_size) == 0)
+ mmu_prop = &prop->pmmu_huge;
+ else
+ mmu_prop = &prop->pmmu;
+
+ /*
+ * The H/W handles mapping of specific page sizes. Hence if the page
+ * size is bigger, we break it to sub-pages and map them separately.
+ */
+ if ((page_size % mmu_prop->page_size) == 0) {
+ real_page_size = mmu_prop->page_size;
+ } else {
+ dev_err(hdev->dev,
+ "page size of %u is not %uKB aligned, can't unmap\n",
+ page_size, mmu_prop->page_size >> 10);
+
+ return -EFAULT;
+ }
+
+ WARN_ONCE((phys_addr & (real_page_size - 1)),
+ "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
+ phys_addr, real_page_size);
+
+ npages = page_size / real_page_size;
+ real_virt_addr = virt_addr;
+ real_phys_addr = phys_addr;
+
+ for (i = 0 ; i < npages ; i++) {
+ rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
+ real_page_size, is_dram_addr);
+ if (rc)
+ goto err;
+
+ real_virt_addr += real_page_size;
+ real_phys_addr += real_page_size;
+ mapped_cnt++;
+ }
+
+ if (flush_pte)
+ flush(ctx);
+
+ return 0;
+
+err:
+ real_virt_addr = virt_addr;
+ for (i = 0 ; i < mapped_cnt ; i++) {
+ if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
+ dev_warn_ratelimited(hdev->dev,
+ "failed to unmap va: 0x%llx\n", real_virt_addr);
+
+ real_virt_addr += real_page_size;
+ }
+
+ flush(ctx);
+
+ return rc;
+}
+
+/*
+ * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_out(struct hl_ctx *ctx)
+{
+
+}
+
+/*
+ * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_in(struct hl_ctx *ctx)
+{
+
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/pci/pci_general.h"
+
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+
+#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
+
+#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31)
+#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30)
+#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK BIT(19)
+#define IATU_REGION_CTRL_BAR_NUM_MASK GENMASK(10, 8)
+
+/**
+ * hl_pci_bars_map() - Map PCI BARs.
+ * @hdev: Pointer to hl_device structure.
+ * @name: Array of BAR names.
+ * @is_wc: Array with flag per BAR whether a write-combined mapping is needed.
+ *
+ * Request PCI regions and map them to kernel virtual addresses.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
+ bool is_wc[3])
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int rc, i, bar;
+
+ rc = pci_request_regions(pdev, HL_NAME);
+ if (rc) {
+ dev_err(hdev->dev, "Cannot obtain PCI resources\n");
+ return rc;
+ }
+
+ for (i = 0 ; i < 3 ; i++) {
+ bar = i * 2; /* 64-bit BARs */
+ hdev->pcie_bar[bar] = is_wc[i] ?
+ pci_ioremap_wc_bar(pdev, bar) :
+ pci_ioremap_bar(pdev, bar);
+ if (!hdev->pcie_bar[bar]) {
+ dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n",
+ is_wc[i] ? "_wc" : "", name[i]);
+ rc = -ENODEV;
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ for (i = 2 ; i >= 0 ; i--) {
+ bar = i * 2; /* 64-bit BARs */
+ if (hdev->pcie_bar[bar])
+ iounmap(hdev->pcie_bar[bar]);
+ }
+
+ pci_release_regions(pdev);
+
+ return rc;
+}
+
+/**
+ * hl_pci_bars_unmap() - Unmap PCI BARS.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Release all PCI BARs and unmap their virtual addresses.
+ */
+static void hl_pci_bars_unmap(struct hl_device *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int i, bar;
+
+ for (i = 2 ; i >= 0 ; i--) {
+ bar = i * 2; /* 64-bit BARs */
+ iounmap(hdev->pcie_bar[bar]);
+ }
+
+ pci_release_regions(pdev);
+}
+
+/**
+ * hl_pci_elbi_write() - Write through the ELBI interface.
+ * @hdev: Pointer to hl_device structure.
+ * @addr: Address to write to
+ * @data: Data to write
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ ktime_t timeout;
+ u64 msec;
+ u32 val;
+
+ if (hdev->pldm)
+ msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
+ else
+ msec = HL_PCI_ELBI_TIMEOUT_MSEC;
+
+ /* Clear previous status */
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
+
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
+ PCI_CONFIG_ELBI_CTRL_WRITE);
+
+ timeout = ktime_add_ms(ktime_get(), msec);
+ for (;;) {
+ pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
+ if (val & PCI_CONFIG_ELBI_STS_MASK)
+ break;
+ if (ktime_compare(ktime_get(), timeout) > 0) {
+ pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
+ &val);
+ break;
+ }
+
+ usleep_range(300, 500);
+ }
+
+ if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
+ return 0;
+
+ if (val & PCI_CONFIG_ELBI_STS_ERR) {
+ dev_err(hdev->dev, "Error writing to ELBI\n");
+ return -EIO;
+ }
+
+ if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
+ dev_err(hdev->dev, "ELBI write didn't finish in time\n");
+ return -EIO;
+ }
+
+ dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
+ return -EIO;
+}
+
+/**
+ * hl_pci_iatu_write() - iatu write routine.
+ * @hdev: Pointer to hl_device structure.
+ * @addr: Address to write to
+ * @data: Data to write
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 dbi_offset;
+ int rc;
+
+ dbi_offset = addr & 0xFFF;
+
+ rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
+ data);
+
+ if (rc)
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * hl_pci_reset_link_through_bridge() - Reset PCI link.
+ * @hdev: Pointer to hl_device structure.
+ */
+static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ struct pci_dev *parent_port;
+ u16 val;
+
+ parent_port = pdev->bus->self;
+ pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
+ val |= PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+ ssleep(1);
+
+ val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
+ pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+ ssleep(3);
+}
+
+/**
+ * hl_pci_set_inbound_region() - Configure inbound region
+ * @hdev: Pointer to hl_device structure.
+ * @region: Inbound region number.
+ * @pci_region: Inbound region parameters.
+ *
+ * Configure the iATU inbound region.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
+ struct hl_inbound_pci_region *pci_region)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 bar_phys_base, region_base, region_end_address;
+ u32 offset, ctrl_reg_val;
+ int rc = 0;
+
+ /* region offset */
+ offset = (0x200 * region) + 0x100;
+
+ if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
+ bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
+ region_base = bar_phys_base + pci_region->offset_in_bar;
+ region_end_address = region_base + pci_region->size - 1;
+
+ rc |= hl_pci_iatu_write(hdev, offset + 0x8,
+ lower_32_bits(region_base));
+ rc |= hl_pci_iatu_write(hdev, offset + 0xC,
+ upper_32_bits(region_base));
+ rc |= hl_pci_iatu_write(hdev, offset + 0x10,
+ lower_32_bits(region_end_address));
+ }
+
+ /* Point to the specified address */
+ rc = hl_pci_iatu_write(hdev, offset + 0x14,
+ lower_32_bits(pci_region->addr));
+ rc |= hl_pci_iatu_write(hdev, offset + 0x18,
+ upper_32_bits(pci_region->addr));
+ rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
+
+ /* Enable + bar/address match + match enable + bar number */
+ ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
+ ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
+ pci_region->mode);
+ ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
+
+ if (pci_region->mode == PCI_BAR_MATCH_MODE)
+ ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
+ pci_region->bar);
+
+ rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
+
+ /* Return the DBI window to the default location */
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+
+ if (rc)
+ dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
+ pci_region->bar, pci_region->addr);
+
+ return rc;
+}
+
+/**
+ * hl_pci_set_outbound_region() - Configure outbound region 0
+ * @hdev: Pointer to hl_device structure.
+ * @pci_region: Outbound region parameters.
+ *
+ * Configure the iATU outbound region 0.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_set_outbound_region(struct hl_device *hdev,
+ struct hl_outbound_pci_region *pci_region)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 outbound_region_end_address;
+ int rc = 0;
+
+ /* Outbound Region 0 */
+ outbound_region_end_address =
+ pci_region->addr + pci_region->size - 1;
+ rc |= hl_pci_iatu_write(hdev, 0x008,
+ lower_32_bits(pci_region->addr));
+ rc |= hl_pci_iatu_write(hdev, 0x00C,
+ upper_32_bits(pci_region->addr));
+ rc |= hl_pci_iatu_write(hdev, 0x010,
+ lower_32_bits(outbound_region_end_address));
+ rc |= hl_pci_iatu_write(hdev, 0x014, 0);
+
+ if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
+ rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
+ else
+ rc |= hl_pci_iatu_write(hdev, 0x018, 0);
+
+ rc |= hl_pci_iatu_write(hdev, 0x020,
+ upper_32_bits(outbound_region_end_address));
+ /* Increase region size */
+ rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
+ /* Enable */
+ rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
+
+ /* Return the DBI window to the default location */
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+
+ return rc;
+}
+
+/**
+ * hl_pci_set_dma_mask() - Set DMA masks for the device.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * This function sets the DMA masks (regular and consistent) for a specified
+ * value. If it doesn't succeed, it tries to set it to a fall-back value
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+static int hl_pci_set_dma_mask(struct hl_device *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int rc;
+
+ /* set DMA mask */
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to set pci dma mask to %d bits, error %d\n",
+ hdev->dma_mask, rc);
+ return rc;
+ }
+
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to set pci consistent dma mask to %d bits, error %d\n",
+ hdev->dma_mask, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * hl_pci_init() - PCI initialization code.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Set DMA masks, initialize the PCI controller and map the PCI BARs.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_pci_init(struct hl_device *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int rc;
+
+ if (hdev->reset_pcilink)
+ hl_pci_reset_link_through_bridge(hdev);
+
+ rc = pci_enable_device_mem(pdev);
+ if (rc) {
+ dev_err(hdev->dev, "can't enable PCI device\n");
+ return rc;
+ }
+
+ pci_set_master(pdev);
+
+ rc = hdev->asic_funcs->pci_bars_map(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
+ goto disable_device;
+ }
+
+ rc = hdev->asic_funcs->init_iatu(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize iATU\n");
+ goto disable_device;
+ }
+
+ rc = hl_pci_set_dma_mask(hdev);
+ if (rc)
+ goto disable_device;
+
+ return 0;
+
+disable_device:
+ pci_clear_master(pdev);
+ pci_disable_device(pdev);
+
+ return rc;
+}
+
+/**
+ * hl_fw_fini() - PCI finalization code.
+ * @hdev: Pointer to hl_device structure
+ *
+ * Unmap PCI bars and disable PCI device.
+ */
+void hl_pci_fini(struct hl_device *hdev)
+{
+ hl_pci_bars_unmap(hdev);
+
+ pci_clear_master(hdev->pdev);
+ pci_disable_device(hdev->pdev);
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/pci.h>
+
+#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */
+#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */
+
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
+{
+ struct armcp_packet pkt;
+ long result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ if (curr)
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ else
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.pll_index = cpu_to_le32(pll_index);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SET_CLK_PKT_TIMEOUT, &result);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to get frequency of PLL %d, error %d\n",
+ pll_index, rc);
+ result = rc;
+ }
+
+ return result;
+}
+
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.pll_index = cpu_to_le32(pll_index);
+ pkt.value = cpu_to_le64(freq);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SET_CLK_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev,
+ "Failed to set frequency to PLL %d, error %d\n",
+ pll_index, rc);
+}
+
+u64 hl_get_max_power(struct hl_device *hdev)
+{
+ struct armcp_packet pkt;
+ long result;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SET_PWR_PKT_TIMEOUT, &result);
+
+ if (rc) {
+ dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
+ result = rc;
+ }
+
+ return result;
+}
+
+void hl_set_max_power(struct hl_device *hdev, u64 value)
+{
+ struct armcp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.value = cpu_to_le64(value);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ SET_PWR_PKT_TIMEOUT, NULL);
+
+ if (rc)
+ dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
+}
+
+static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver);
+}
+
+static ssize_t armcp_kernel_ver_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
+}
+
+static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
+}
+
+static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "0x%08x\n",
+ hdev->asic_prop.armcp_info.cpld_version);
+}
+
+static ssize_t infineon_ver_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "0x%04x\n",
+ hdev->asic_prop.armcp_info.infineon_version);
+}
+
+static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
+}
+
+static ssize_t thermal_ver_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
+}
+
+static ssize_t preboot_btl_ver_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver);
+}
+
+static ssize_t soft_reset_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ long value;
+ int rc;
+
+ rc = kstrtoul(buf, 0, &value);
+
+ if (rc) {
+ count = -EINVAL;
+ goto out;
+ }
+
+ if (!hdev->supports_soft_reset) {
+ dev_err(hdev->dev, "Device does not support soft-reset\n");
+ goto out;
+ }
+
+ dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
+
+ hl_device_reset(hdev, false, false);
+
+out:
+ return count;
+}
+
+static ssize_t hard_reset_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ long value;
+ int rc;
+
+ rc = kstrtoul(buf, 0, &value);
+
+ if (rc) {
+ count = -EINVAL;
+ goto out;
+ }
+
+ dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
+
+ hl_device_reset(hdev, true, false);
+
+out:
+ return count;
+}
+
+static ssize_t device_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ char *str;
+
+ switch (hdev->asic_type) {
+ case ASIC_GOYA:
+ str = "GOYA";
+ break;
+ case ASIC_GAUDI:
+ str = "GAUDI";
+ break;
+ default:
+ dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
+ hdev->asic_type);
+ return -EINVAL;
+ }
+
+ return sprintf(buf, "%s\n", str);
+}
+
+static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%04x:%02x:%02x.%x\n",
+ pci_domain_nr(hdev->pdev->bus),
+ hdev->pdev->bus->number,
+ PCI_SLOT(hdev->pdev->devfn),
+ PCI_FUNC(hdev->pdev->devfn));
+}
+
+static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ char *str;
+
+ if (atomic_read(&hdev->in_reset))
+ str = "In reset";
+ else if (hdev->disabled)
+ str = "Malfunction";
+ else
+ str = "Operational";
+
+ return sprintf(buf, "%s\n", str);
+}
+
+static ssize_t soft_reset_cnt_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", hdev->soft_reset_cnt);
+}
+
+static ssize_t hard_reset_cnt_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", hdev->hard_reset_cnt);
+}
+
+static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ long val;
+
+ if (hl_device_disabled_or_in_reset(hdev))
+ return -ENODEV;
+
+ val = hl_get_max_power(hdev);
+
+ return sprintf(buf, "%lu\n", val);
+}
+
+static ssize_t max_power_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ unsigned long value;
+ int rc;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ count = -ENODEV;
+ goto out;
+ }
+
+ rc = kstrtoul(buf, 0, &value);
+
+ if (rc) {
+ count = -EINVAL;
+ goto out;
+ }
+
+ hdev->max_power = value;
+ hl_set_max_power(hdev, value);
+
+out:
+ return count;
+}
+
+static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf, loff_t offset,
+ size_t max_size)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct hl_device *hdev = dev_get_drvdata(dev);
+ char *data;
+ int rc;
+
+ if (!max_size)
+ return -EINVAL;
+
+ data = kzalloc(max_size, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size);
+ if (rc)
+ goto out;
+
+ memcpy(buf, data, max_size);
+
+out:
+ kfree(data);
+
+ return max_size;
+}
+
+static DEVICE_ATTR_RO(armcp_kernel_ver);
+static DEVICE_ATTR_RO(armcp_ver);
+static DEVICE_ATTR_RO(cpld_ver);
+static DEVICE_ATTR_RO(device_type);
+static DEVICE_ATTR_RO(fuse_ver);
+static DEVICE_ATTR_WO(hard_reset);
+static DEVICE_ATTR_RO(hard_reset_cnt);
+static DEVICE_ATTR_RO(infineon_ver);
+static DEVICE_ATTR_RW(max_power);
+static DEVICE_ATTR_RO(pci_addr);
+static DEVICE_ATTR_RO(preboot_btl_ver);
+static DEVICE_ATTR_WO(soft_reset);
+static DEVICE_ATTR_RO(soft_reset_cnt);
+static DEVICE_ATTR_RO(status);
+static DEVICE_ATTR_RO(thermal_ver);
+static DEVICE_ATTR_RO(uboot_ver);
+
+static struct bin_attribute bin_attr_eeprom = {
+ .attr = {.name = "eeprom", .mode = (0444)},
+ .size = PAGE_SIZE,
+ .read = eeprom_read_handler
+};
+
+static struct attribute *hl_dev_attrs[] = {
+ &dev_attr_armcp_kernel_ver.attr,
+ &dev_attr_armcp_ver.attr,
+ &dev_attr_cpld_ver.attr,
+ &dev_attr_device_type.attr,
+ &dev_attr_fuse_ver.attr,
+ &dev_attr_hard_reset.attr,
+ &dev_attr_hard_reset_cnt.attr,
+ &dev_attr_infineon_ver.attr,
+ &dev_attr_max_power.attr,
+ &dev_attr_pci_addr.attr,
+ &dev_attr_preboot_btl_ver.attr,
+ &dev_attr_soft_reset.attr,
+ &dev_attr_soft_reset_cnt.attr,
+ &dev_attr_status.attr,
+ &dev_attr_thermal_ver.attr,
+ &dev_attr_uboot_ver.attr,
+ NULL,
+};
+
+static struct bin_attribute *hl_dev_bin_attrs[] = {
+ &bin_attr_eeprom,
+ NULL
+};
+
+static struct attribute_group hl_dev_attr_group = {
+ .attrs = hl_dev_attrs,
+ .bin_attrs = hl_dev_bin_attrs,
+};
+
+static struct attribute_group hl_dev_clks_attr_group;
+
+static const struct attribute_group *hl_dev_attr_groups[] = {
+ &hl_dev_attr_group,
+ &hl_dev_clks_attr_group,
+ NULL,
+};
+
+int hl_sysfs_init(struct hl_device *hdev)
+{
+ int rc;
+
+ if (hdev->asic_type == ASIC_GOYA)
+ hdev->pm_mng_profile = PM_AUTO;
+ else
+ hdev->pm_mng_profile = PM_MANUAL;
+ hdev->max_power = hdev->asic_prop.max_power_default;
+
+ hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
+
+ rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to add groups to device, error %d\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+void hl_sysfs_fini(struct hl_device *hdev)
+{
+ device_remove_groups(hdev->dev, hl_dev_attr_groups);
+}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-static void hl_ctx_fini(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
- int i;
-
- /*
- * If we arrived here, there are no jobs waiting for this context
- * on its queues so we can safely remove it.
- * This is because for each CS, we increment the ref count and for
- * every CS that was finished we decrement it and we won't arrive
- * to this function unless the ref count is 0
- */
-
- for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
- dma_fence_put(ctx->cs_pending[i]);
-
- kfree(ctx->cs_pending);
-
- if (ctx->asid != HL_KERNEL_ASID_ID) {
- /* The engines are stopped as there is no executing CS, but the
- * Coresight might be still working by accessing addresses
- * related to the stopped engines. Hence stop it explicitly.
- * Stop only if this is the compute context, as there can be
- * only one compute context
- */
- if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
- hl_device_set_debug_mode(hdev, false);
-
- hl_vm_ctx_fini(ctx);
- hl_asid_free(hdev, ctx->asid);
- } else {
- hl_mmu_ctx_fini(ctx);
- }
-}
-
-void hl_ctx_do_release(struct kref *ref)
-{
- struct hl_ctx *ctx;
-
- ctx = container_of(ref, struct hl_ctx, refcount);
-
- hl_ctx_fini(ctx);
-
- if (ctx->hpriv)
- hl_hpriv_put(ctx->hpriv);
-
- kfree(ctx);
-}
-
-int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
-{
- struct hl_ctx_mgr *mgr = &hpriv->ctx_mgr;
- struct hl_ctx *ctx;
- int rc;
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx) {
- rc = -ENOMEM;
- goto out_err;
- }
-
- mutex_lock(&mgr->ctx_lock);
- rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
- mutex_unlock(&mgr->ctx_lock);
-
- if (rc < 0) {
- dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
- goto free_ctx;
- }
-
- ctx->handle = rc;
-
- rc = hl_ctx_init(hdev, ctx, false);
- if (rc)
- goto remove_from_idr;
-
- hl_hpriv_get(hpriv);
- ctx->hpriv = hpriv;
-
- /* TODO: remove for multiple contexts per process */
- hpriv->ctx = ctx;
-
- /* TODO: remove the following line for multiple process support */
- hdev->compute_ctx = ctx;
-
- return 0;
-
-remove_from_idr:
- mutex_lock(&mgr->ctx_lock);
- idr_remove(&mgr->ctx_handles, ctx->handle);
- mutex_unlock(&mgr->ctx_lock);
-free_ctx:
- kfree(ctx);
-out_err:
- return rc;
-}
-
-void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
-{
- if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
- return;
-
- dev_warn(hdev->dev,
- "user process released device but its command submissions are still executing\n");
-}
-
-int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
-{
- int rc = 0;
-
- ctx->hdev = hdev;
-
- kref_init(&ctx->refcount);
-
- ctx->cs_sequence = 1;
- spin_lock_init(&ctx->cs_lock);
- atomic_set(&ctx->thread_ctx_switch_token, 1);
- ctx->thread_ctx_switch_wait_token = 0;
- ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
- sizeof(struct dma_fence *),
- GFP_KERNEL);
- if (!ctx->cs_pending)
- return -ENOMEM;
-
- if (is_kernel_ctx) {
- ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
- rc = hl_mmu_ctx_init(ctx);
- if (rc) {
- dev_err(hdev->dev, "Failed to init mmu ctx module\n");
- goto mem_ctx_err;
- }
- } else {
- ctx->asid = hl_asid_alloc(hdev);
- if (!ctx->asid) {
- dev_err(hdev->dev, "No free ASID, failed to create context\n");
- return -ENOMEM;
- }
-
- rc = hl_vm_ctx_init(ctx);
- if (rc) {
- dev_err(hdev->dev, "Failed to init mem ctx module\n");
- rc = -ENOMEM;
- goto mem_ctx_err;
- }
- }
-
- return 0;
-
-mem_ctx_err:
- if (ctx->asid != HL_KERNEL_ASID_ID)
- hl_asid_free(hdev, ctx->asid);
-
- return rc;
-}
-
-void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
-{
- kref_get(&ctx->refcount);
-}
-
-int hl_ctx_put(struct hl_ctx *ctx)
-{
- return kref_put(&ctx->refcount, hl_ctx_do_release);
-}
-
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
-{
- struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
- struct dma_fence *fence;
-
- spin_lock(&ctx->cs_lock);
-
- if (seq >= ctx->cs_sequence) {
- spin_unlock(&ctx->cs_lock);
- return ERR_PTR(-EINVAL);
- }
-
- if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
- spin_unlock(&ctx->cs_lock);
- return NULL;
- }
-
- fence = dma_fence_get(
- ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
- spin_unlock(&ctx->cs_lock);
-
- return fence;
-}
-
-/*
- * hl_ctx_mgr_init - initialize the context manager
- *
- * @mgr: pointer to context manager structure
- *
- * This manager is an object inside the hpriv object of the user process.
- * The function is called when a user process opens the FD.
- */
-void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr)
-{
- mutex_init(&mgr->ctx_lock);
- idr_init(&mgr->ctx_handles);
-}
-
-/*
- * hl_ctx_mgr_fini - finalize the context manager
- *
- * @hdev: pointer to device structure
- * @mgr: pointer to context manager structure
- *
- * This function goes over all the contexts in the manager and frees them.
- * It is called when a process closes the FD.
- */
-void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
-{
- struct hl_ctx *ctx;
- struct idr *idp;
- u32 id;
-
- idp = &mgr->ctx_handles;
-
- idr_for_each_entry(idp, ctx, id)
- hl_ctx_free(hdev, ctx);
-
- idr_destroy(&mgr->ctx_handles);
- mutex_destroy(&mgr->ctx_lock);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-
-#include <linux/pci.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-
-#define MMU_ADDR_BUF_SIZE 40
-#define MMU_ASID_BUF_SIZE 10
-#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
-
-static struct dentry *hl_debug_root;
-
-static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
- u8 i2c_reg, u32 *val)
-{
- struct armcp_packet pkt;
- int rc;
-
- if (hl_device_disabled_or_in_reset(hdev))
- return -EBUSY;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.i2c_bus = i2c_bus;
- pkt.i2c_addr = i2c_addr;
- pkt.i2c_reg = i2c_reg;
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- HL_DEVICE_TIMEOUT_USEC, (long *) val);
-
- if (rc)
- dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
-
- return rc;
-}
-
-static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
- u8 i2c_reg, u32 val)
-{
- struct armcp_packet pkt;
- int rc;
-
- if (hl_device_disabled_or_in_reset(hdev))
- return -EBUSY;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.i2c_bus = i2c_bus;
- pkt.i2c_addr = i2c_addr;
- pkt.i2c_reg = i2c_reg;
- pkt.value = cpu_to_le64(val);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- HL_DEVICE_TIMEOUT_USEC, NULL);
-
- if (rc)
- dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
-
- return rc;
-}
-
-static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
-{
- struct armcp_packet pkt;
- int rc;
-
- if (hl_device_disabled_or_in_reset(hdev))
- return;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.led_index = cpu_to_le32(led);
- pkt.value = cpu_to_le64(state);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- HL_DEVICE_TIMEOUT_USEC, NULL);
-
- if (rc)
- dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
-}
-
-static int command_buffers_show(struct seq_file *s, void *data)
-{
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_cb *cb;
- bool first = true;
-
- spin_lock(&dev_entry->cb_spinlock);
-
- list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
- if (first) {
- first = false;
- seq_puts(s, "\n");
- seq_puts(s, " CB ID CTX ID CB size CB RefCnt mmap? CS counter\n");
- seq_puts(s, "---------------------------------------------------------------\n");
- }
- seq_printf(s,
- " %03d %d 0x%08x %d %d %d\n",
- cb->id, cb->ctx_id, cb->size,
- kref_read(&cb->refcount),
- cb->mmap, cb->cs_cnt);
- }
-
- spin_unlock(&dev_entry->cb_spinlock);
-
- if (!first)
- seq_puts(s, "\n");
-
- return 0;
-}
-
-static int command_submission_show(struct seq_file *s, void *data)
-{
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_cs *cs;
- bool first = true;
-
- spin_lock(&dev_entry->cs_spinlock);
-
- list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
- if (first) {
- first = false;
- seq_puts(s, "\n");
- seq_puts(s, " CS ID CTX ASID CS RefCnt Submitted Completed\n");
- seq_puts(s, "------------------------------------------------------\n");
- }
- seq_printf(s,
- " %llu %d %d %d %d\n",
- cs->sequence, cs->ctx->asid,
- kref_read(&cs->refcount),
- cs->submitted, cs->completed);
- }
-
- spin_unlock(&dev_entry->cs_spinlock);
-
- if (!first)
- seq_puts(s, "\n");
-
- return 0;
-}
-
-static int command_submission_jobs_show(struct seq_file *s, void *data)
-{
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_cs_job *job;
- bool first = true;
-
- spin_lock(&dev_entry->cs_job_spinlock);
-
- list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
- if (first) {
- first = false;
- seq_puts(s, "\n");
- seq_puts(s, " JOB ID CS ID CTX ASID H/W Queue\n");
- seq_puts(s, "---------------------------------------\n");
- }
- if (job->cs)
- seq_printf(s,
- " %02d %llu %d %d\n",
- job->id, job->cs->sequence, job->cs->ctx->asid,
- job->hw_queue_id);
- else
- seq_printf(s,
- " %02d 0 %d %d\n",
- job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
- }
-
- spin_unlock(&dev_entry->cs_job_spinlock);
-
- if (!first)
- seq_puts(s, "\n");
-
- return 0;
-}
-
-static int userptr_show(struct seq_file *s, void *data)
-{
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_userptr *userptr;
- char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
- "DMA_FROM_DEVICE", "DMA_NONE"};
- bool first = true;
-
- spin_lock(&dev_entry->userptr_spinlock);
-
- list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
- if (first) {
- first = false;
- seq_puts(s, "\n");
- seq_puts(s, " user virtual address size dma dir\n");
- seq_puts(s, "----------------------------------------------------------\n");
- }
- seq_printf(s,
- " 0x%-14llx %-10u %-30s\n",
- userptr->addr, userptr->size, dma_dir[userptr->dir]);
- }
-
- spin_unlock(&dev_entry->userptr_spinlock);
-
- if (!first)
- seq_puts(s, "\n");
-
- return 0;
-}
-
-static int vm_show(struct seq_file *s, void *data)
-{
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_ctx *ctx;
- struct hl_vm *vm;
- struct hl_vm_hash_node *hnode;
- struct hl_userptr *userptr;
- struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
- enum vm_type_t *vm_type;
- bool once = true;
- u64 j;
- int i;
-
- if (!dev_entry->hdev->mmu_enable)
- return 0;
-
- spin_lock(&dev_entry->ctx_mem_hash_spinlock);
-
- list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
- once = false;
- seq_puts(s, "\n\n----------------------------------------------------");
- seq_puts(s, "\n----------------------------------------------------\n\n");
- seq_printf(s, "ctx asid: %u\n", ctx->asid);
-
- seq_puts(s, "\nmappings:\n\n");
- seq_puts(s, " virtual address size handle\n");
- seq_puts(s, "----------------------------------------------------\n");
- mutex_lock(&ctx->mem_hash_lock);
- hash_for_each(ctx->mem_hash, i, hnode, node) {
- vm_type = hnode->ptr;
-
- if (*vm_type == VM_TYPE_USERPTR) {
- userptr = hnode->ptr;
- seq_printf(s,
- " 0x%-14llx %-10u\n",
- hnode->vaddr, userptr->size);
- } else {
- phys_pg_pack = hnode->ptr;
- seq_printf(s,
- " 0x%-14llx %-10llu %-4u\n",
- hnode->vaddr, phys_pg_pack->total_size,
- phys_pg_pack->handle);
- }
- }
- mutex_unlock(&ctx->mem_hash_lock);
-
- vm = &ctx->hdev->vm;
- spin_lock(&vm->idr_lock);
-
- if (!idr_is_empty(&vm->phys_pg_pack_handles))
- seq_puts(s, "\n\nallocations:\n");
-
- idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
- if (phys_pg_pack->asid != ctx->asid)
- continue;
-
- seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
- seq_printf(s, "page size: %u\n\n",
- phys_pg_pack->page_size);
- seq_puts(s, " physical address\n");
- seq_puts(s, "---------------------\n");
- for (j = 0 ; j < phys_pg_pack->npages ; j++) {
- seq_printf(s, " 0x%-14llx\n",
- phys_pg_pack->pages[j]);
- }
- }
- spin_unlock(&vm->idr_lock);
-
- }
-
- spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
-
- if (!once)
- seq_puts(s, "\n");
-
- return 0;
-}
-
-/* these inline functions are copied from mmu.c */
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
- return ctx->hdev->asic_prop.mmu_pgt_addr +
- (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr, u64 mask, u64 shift)
-{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & mask) >> shift);
-}
-
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
- mmu_specs->hop0_shift);
-}
-
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
- mmu_specs->hop1_shift);
-}
-
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
- mmu_specs->hop2_shift);
-}
-
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
- mmu_specs->hop3_shift);
-}
-
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_specs,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
- mmu_specs->hop4_shift);
-}
-
-static inline u64 get_next_hop_addr(u64 curr_pte)
-{
- if (curr_pte & PAGE_PRESENT_MASK)
- return curr_pte & HOP_PHYS_ADDR_MASK;
- else
- return ULLONG_MAX;
-}
-
-static int mmu_show(struct seq_file *s, void *data)
-{
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_device *hdev = dev_entry->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
- struct hl_ctx *ctx;
- bool is_dram_addr;
-
- u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
- hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
- hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
- hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
- hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
- virt_addr = dev_entry->mmu_addr;
-
- if (!hdev->mmu_enable)
- return 0;
-
- if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
- ctx = hdev->kernel_ctx;
- else
- ctx = hdev->compute_ctx;
-
- if (!ctx) {
- dev_err(hdev->dev, "no ctx available\n");
- return 0;
- }
-
- is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->dmmu.start_addr,
- prop->dmmu.end_addr);
-
- /* shifts and masks are the same in PMMU and HPMMU, use one of them */
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
- mutex_lock(&ctx->mmu_lock);
-
- /* the following lookup is copied from unmap() in mmu.c */
-
- hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
- hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
- hop1_addr = get_next_hop_addr(hop0_pte);
-
- if (hop1_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
- hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
- hop2_addr = get_next_hop_addr(hop1_pte);
-
- if (hop2_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
- hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
- hop3_addr = get_next_hop_addr(hop2_pte);
-
- if (hop3_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
- hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
-
- if (!(hop3_pte & LAST_MASK)) {
- hop4_addr = get_next_hop_addr(hop3_pte);
-
- if (hop4_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
- virt_addr);
- hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
- if (!(hop4_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
- } else {
- if (!(hop3_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
- }
-
- seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
- dev_entry->mmu_asid, dev_entry->mmu_addr);
-
- seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr);
- seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr);
- seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte);
-
- seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr);
- seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr);
- seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte);
-
- seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr);
- seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr);
- seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte);
-
- seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr);
- seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
- seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
-
- if (!(hop3_pte & LAST_MASK)) {
- seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
- seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
- seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
- }
-
- goto out;
-
-not_mapped:
- dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
- virt_addr);
-out:
- mutex_unlock(&ctx->mmu_lock);
-
- return 0;
-}
-
-static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
- size_t count, loff_t *f_pos)
-{
- struct seq_file *s = file->private_data;
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_device *hdev = dev_entry->hdev;
- char kbuf[MMU_KBUF_SIZE];
- char *c;
- ssize_t rc;
-
- if (!hdev->mmu_enable)
- return count;
-
- if (count > sizeof(kbuf) - 1)
- goto err;
- if (copy_from_user(kbuf, buf, count))
- goto err;
- kbuf[count] = 0;
-
- c = strchr(kbuf, ' ');
- if (!c)
- goto err;
- *c = '\0';
-
- rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
- if (rc)
- goto err;
-
- if (strncmp(c+1, "0x", 2))
- goto err;
- rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
- if (rc)
- goto err;
-
- return count;
-
-err:
- dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
-
- return -EINVAL;
-}
-
-static int engines_show(struct seq_file *s, void *data)
-{
- struct hl_debugfs_entry *entry = s->private;
- struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
- struct hl_device *hdev = dev_entry->hdev;
-
- if (atomic_read(&hdev->in_reset)) {
- dev_warn_ratelimited(hdev->dev,
- "Can't check device idle during reset\n");
- return 0;
- }
-
- hdev->asic_funcs->is_device_idle(hdev, NULL, s);
-
- return 0;
-}
-
-static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
-
- if (!hdev->mmu_enable)
- goto out;
-
- if (hdev->dram_supports_virtual_memory &&
- (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
- return true;
-
- if (addr >= prop->pmmu.start_addr &&
- addr < prop->pmmu.end_addr)
- return true;
-
- if (addr >= prop->pmmu_huge.start_addr &&
- addr < prop->pmmu_huge.end_addr)
- return true;
-out:
- return false;
-}
-
-static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
- u64 *phys_addr)
-{
- struct hl_ctx *ctx = hdev->compute_ctx;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
- u64 hop_addr, hop_pte_addr, hop_pte;
- u64 offset_mask = HOP4_MASK | FLAGS_MASK;
- int rc = 0;
- bool is_dram_addr;
-
- if (!ctx) {
- dev_err(hdev->dev, "no ctx available\n");
- return -EINVAL;
- }
-
- is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->dmmu.start_addr,
- prop->dmmu.end_addr);
-
- /* shifts and masks are the same in PMMU and HPMMU, use one of them */
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
- mutex_lock(&ctx->mmu_lock);
-
- /* hop 0 */
- hop_addr = get_hop0_addr(ctx);
- hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- /* hop 1 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- /* hop 2 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- /* hop 3 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- if (!(hop_pte & LAST_MASK)) {
- /* hop 4 */
- hop_addr = get_next_hop_addr(hop_pte);
- if (hop_addr == ULLONG_MAX)
- goto not_mapped;
- hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
- virt_addr);
- hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
-
- offset_mask = FLAGS_MASK;
- }
-
- if (!(hop_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
-
- *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask);
-
- goto out;
-
-not_mapped:
- dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
- virt_addr);
- rc = -EINVAL;
-out:
- mutex_unlock(&ctx->mmu_lock);
- return rc;
-}
-
-static ssize_t hl_data_read32(struct file *f, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- char tmp_buf[32];
- u64 addr = entry->addr;
- u32 val;
- ssize_t rc;
-
- if (atomic_read(&hdev->in_reset)) {
- dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
- return 0;
- }
-
- if (*ppos)
- return 0;
-
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
- if (rc) {
- dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
- return rc;
- }
-
- sprintf(tmp_buf, "0x%08x\n", val);
- return simple_read_from_buffer(buf, count, ppos, tmp_buf,
- strlen(tmp_buf));
-}
-
-static ssize_t hl_data_write32(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u64 addr = entry->addr;
- u32 value;
- ssize_t rc;
-
- if (atomic_read(&hdev->in_reset)) {
- dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
- return 0;
- }
-
- rc = kstrtouint_from_user(buf, count, 16, &value);
- if (rc)
- return rc;
-
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
- if (rc) {
- dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
- value, addr);
- return rc;
- }
-
- return count;
-}
-
-static ssize_t hl_data_read64(struct file *f, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- char tmp_buf[32];
- u64 addr = entry->addr;
- u64 val;
- ssize_t rc;
-
- if (*ppos)
- return 0;
-
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
- if (rc) {
- dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
- return rc;
- }
-
- sprintf(tmp_buf, "0x%016llx\n", val);
- return simple_read_from_buffer(buf, count, ppos, tmp_buf,
- strlen(tmp_buf));
-}
-
-static ssize_t hl_data_write64(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u64 addr = entry->addr;
- u64 value;
- ssize_t rc;
-
- rc = kstrtoull_from_user(buf, count, 16, &value);
- if (rc)
- return rc;
-
- if (hl_is_device_va(hdev, addr)) {
- rc = device_va_to_pa(hdev, addr, &addr);
- if (rc)
- return rc;
- }
-
- rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
- if (rc) {
- dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
- value, addr);
- return rc;
- }
-
- return count;
-}
-
-static ssize_t hl_get_power_state(struct file *f, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- char tmp_buf[200];
- int i;
-
- if (*ppos)
- return 0;
-
- if (hdev->pdev->current_state == PCI_D0)
- i = 1;
- else if (hdev->pdev->current_state == PCI_D3hot)
- i = 2;
- else
- i = 3;
-
- sprintf(tmp_buf,
- "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
- return simple_read_from_buffer(buf, count, ppos, tmp_buf,
- strlen(tmp_buf));
-}
-
-static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u32 value;
- ssize_t rc;
-
- rc = kstrtouint_from_user(buf, count, 10, &value);
- if (rc)
- return rc;
-
- if (value == 1) {
- pci_set_power_state(hdev->pdev, PCI_D0);
- pci_restore_state(hdev->pdev);
- rc = pci_enable_device(hdev->pdev);
- } else if (value == 2) {
- pci_save_state(hdev->pdev);
- pci_disable_device(hdev->pdev);
- pci_set_power_state(hdev->pdev, PCI_D3hot);
- } else {
- dev_dbg(hdev->dev, "invalid power state value %u\n", value);
- return -EINVAL;
- }
-
- return count;
-}
-
-static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- char tmp_buf[32];
- u32 val;
- ssize_t rc;
-
- if (*ppos)
- return 0;
-
- rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
- entry->i2c_reg, &val);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to read from I2C bus %d, addr %d, reg %d\n",
- entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
- return rc;
- }
-
- sprintf(tmp_buf, "0x%02x\n", val);
- rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
- strlen(tmp_buf));
-
- return rc;
-}
-
-static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u32 value;
- ssize_t rc;
-
- rc = kstrtouint_from_user(buf, count, 16, &value);
- if (rc)
- return rc;
-
- rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
- entry->i2c_reg, value);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n",
- value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
- return rc;
- }
-
- return count;
-}
-
-static ssize_t hl_led0_write(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u32 value;
- ssize_t rc;
-
- rc = kstrtouint_from_user(buf, count, 10, &value);
- if (rc)
- return rc;
-
- value = value ? 1 : 0;
-
- hl_debugfs_led_set(hdev, 0, value);
-
- return count;
-}
-
-static ssize_t hl_led1_write(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u32 value;
- ssize_t rc;
-
- rc = kstrtouint_from_user(buf, count, 10, &value);
- if (rc)
- return rc;
-
- value = value ? 1 : 0;
-
- hl_debugfs_led_set(hdev, 1, value);
-
- return count;
-}
-
-static ssize_t hl_led2_write(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u32 value;
- ssize_t rc;
-
- rc = kstrtouint_from_user(buf, count, 10, &value);
- if (rc)
- return rc;
-
- value = value ? 1 : 0;
-
- hl_debugfs_led_set(hdev, 2, value);
-
- return count;
-}
-
-static ssize_t hl_device_read(struct file *f, char __user *buf,
- size_t count, loff_t *ppos)
-{
- static const char *help =
- "Valid values: disable, enable, suspend, resume, cpu_timeout\n";
- return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
-}
-
-static ssize_t hl_device_write(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- char data[30] = {0};
-
- /* don't allow partial writes */
- if (*ppos != 0)
- return 0;
-
- simple_write_to_buffer(data, 29, ppos, buf, count);
-
- if (strncmp("disable", data, strlen("disable")) == 0) {
- hdev->disabled = true;
- } else if (strncmp("enable", data, strlen("enable")) == 0) {
- hdev->disabled = false;
- } else if (strncmp("suspend", data, strlen("suspend")) == 0) {
- hdev->asic_funcs->suspend(hdev);
- } else if (strncmp("resume", data, strlen("resume")) == 0) {
- hdev->asic_funcs->resume(hdev);
- } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
- hdev->device_cpu_disabled = true;
- } else {
- dev_err(hdev->dev,
- "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
- count = -EINVAL;
- }
-
- return count;
-}
-
-static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- char tmp_buf[200];
- ssize_t rc;
-
- if (*ppos)
- return 0;
-
- sprintf(tmp_buf, "%d\n", hdev->clock_gating);
- rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
- strlen(tmp_buf) + 1);
-
- return rc;
-}
-
-static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u32 value;
- ssize_t rc;
-
- if (atomic_read(&hdev->in_reset)) {
- dev_warn_ratelimited(hdev->dev,
- "Can't change clock gating during reset\n");
- return 0;
- }
-
- rc = kstrtouint_from_user(buf, count, 10, &value);
- if (rc)
- return rc;
-
- if (value) {
- hdev->clock_gating = 1;
- if (hdev->asic_funcs->enable_clock_gating)
- hdev->asic_funcs->enable_clock_gating(hdev);
- } else {
- if (hdev->asic_funcs->disable_clock_gating)
- hdev->asic_funcs->disable_clock_gating(hdev);
- hdev->clock_gating = 0;
- }
-
- return count;
-}
-
-static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- char tmp_buf[200];
- ssize_t rc;
-
- if (*ppos)
- return 0;
-
- sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
- rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
- strlen(tmp_buf) + 1);
-
- return rc;
-}
-
-static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
- struct hl_device *hdev = entry->hdev;
- u32 value;
- ssize_t rc;
-
- if (atomic_read(&hdev->in_reset)) {
- dev_warn_ratelimited(hdev->dev,
- "Can't change stop on error during reset\n");
- return 0;
- }
-
- rc = kstrtouint_from_user(buf, count, 10, &value);
- if (rc)
- return rc;
-
- hdev->stop_on_err = value ? 1 : 0;
-
- hl_device_reset(hdev, false, false);
-
- return count;
-}
-
-static const struct file_operations hl_data32b_fops = {
- .owner = THIS_MODULE,
- .read = hl_data_read32,
- .write = hl_data_write32
-};
-
-static const struct file_operations hl_data64b_fops = {
- .owner = THIS_MODULE,
- .read = hl_data_read64,
- .write = hl_data_write64
-};
-
-static const struct file_operations hl_i2c_data_fops = {
- .owner = THIS_MODULE,
- .read = hl_i2c_data_read,
- .write = hl_i2c_data_write
-};
-
-static const struct file_operations hl_power_fops = {
- .owner = THIS_MODULE,
- .read = hl_get_power_state,
- .write = hl_set_power_state
-};
-
-static const struct file_operations hl_led0_fops = {
- .owner = THIS_MODULE,
- .write = hl_led0_write
-};
-
-static const struct file_operations hl_led1_fops = {
- .owner = THIS_MODULE,
- .write = hl_led1_write
-};
-
-static const struct file_operations hl_led2_fops = {
- .owner = THIS_MODULE,
- .write = hl_led2_write
-};
-
-static const struct file_operations hl_device_fops = {
- .owner = THIS_MODULE,
- .read = hl_device_read,
- .write = hl_device_write
-};
-
-static const struct file_operations hl_clk_gate_fops = {
- .owner = THIS_MODULE,
- .read = hl_clk_gate_read,
- .write = hl_clk_gate_write
-};
-
-static const struct file_operations hl_stop_on_err_fops = {
- .owner = THIS_MODULE,
- .read = hl_stop_on_err_read,
- .write = hl_stop_on_err_write
-};
-
-static const struct hl_info_list hl_debugfs_list[] = {
- {"command_buffers", command_buffers_show, NULL},
- {"command_submission", command_submission_show, NULL},
- {"command_submission_jobs", command_submission_jobs_show, NULL},
- {"userptr", userptr_show, NULL},
- {"vm", vm_show, NULL},
- {"mmu", mmu_show, mmu_asid_va_write},
- {"engines", engines_show, NULL}
-};
-
-static int hl_debugfs_open(struct inode *inode, struct file *file)
-{
- struct hl_debugfs_entry *node = inode->i_private;
-
- return single_open(file, node->info_ent->show, node);
-}
-
-static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
- size_t count, loff_t *f_pos)
-{
- struct hl_debugfs_entry *node = file->f_inode->i_private;
-
- if (node->info_ent->write)
- return node->info_ent->write(file, buf, count, f_pos);
- else
- return -EINVAL;
-
-}
-
-static const struct file_operations hl_debugfs_fops = {
- .owner = THIS_MODULE,
- .open = hl_debugfs_open,
- .read = seq_read,
- .write = hl_debugfs_write,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-void hl_debugfs_add_device(struct hl_device *hdev)
-{
- struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
- int count = ARRAY_SIZE(hl_debugfs_list);
- struct hl_debugfs_entry *entry;
- struct dentry *ent;
- int i;
-
- dev_entry->hdev = hdev;
- dev_entry->entry_arr = kmalloc_array(count,
- sizeof(struct hl_debugfs_entry),
- GFP_KERNEL);
- if (!dev_entry->entry_arr)
- return;
-
- INIT_LIST_HEAD(&dev_entry->file_list);
- INIT_LIST_HEAD(&dev_entry->cb_list);
- INIT_LIST_HEAD(&dev_entry->cs_list);
- INIT_LIST_HEAD(&dev_entry->cs_job_list);
- INIT_LIST_HEAD(&dev_entry->userptr_list);
- INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
- mutex_init(&dev_entry->file_mutex);
- spin_lock_init(&dev_entry->cb_spinlock);
- spin_lock_init(&dev_entry->cs_spinlock);
- spin_lock_init(&dev_entry->cs_job_spinlock);
- spin_lock_init(&dev_entry->userptr_spinlock);
- spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);
-
- dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
- hl_debug_root);
-
- debugfs_create_x64("addr",
- 0644,
- dev_entry->root,
- &dev_entry->addr);
-
- debugfs_create_file("data32",
- 0644,
- dev_entry->root,
- dev_entry,
- &hl_data32b_fops);
-
- debugfs_create_file("data64",
- 0644,
- dev_entry->root,
- dev_entry,
- &hl_data64b_fops);
-
- debugfs_create_file("set_power_state",
- 0200,
- dev_entry->root,
- dev_entry,
- &hl_power_fops);
-
- debugfs_create_u8("i2c_bus",
- 0644,
- dev_entry->root,
- &dev_entry->i2c_bus);
-
- debugfs_create_u8("i2c_addr",
- 0644,
- dev_entry->root,
- &dev_entry->i2c_addr);
-
- debugfs_create_u8("i2c_reg",
- 0644,
- dev_entry->root,
- &dev_entry->i2c_reg);
-
- debugfs_create_file("i2c_data",
- 0644,
- dev_entry->root,
- dev_entry,
- &hl_i2c_data_fops);
-
- debugfs_create_file("led0",
- 0200,
- dev_entry->root,
- dev_entry,
- &hl_led0_fops);
-
- debugfs_create_file("led1",
- 0200,
- dev_entry->root,
- dev_entry,
- &hl_led1_fops);
-
- debugfs_create_file("led2",
- 0200,
- dev_entry->root,
- dev_entry,
- &hl_led2_fops);
-
- debugfs_create_file("device",
- 0200,
- dev_entry->root,
- dev_entry,
- &hl_device_fops);
-
- debugfs_create_file("clk_gate",
- 0200,
- dev_entry->root,
- dev_entry,
- &hl_clk_gate_fops);
-
- debugfs_create_file("stop_on_err",
- 0644,
- dev_entry->root,
- dev_entry,
- &hl_stop_on_err_fops);
-
- for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
-
- ent = debugfs_create_file(hl_debugfs_list[i].name,
- 0444,
- dev_entry->root,
- entry,
- &hl_debugfs_fops);
- entry->dent = ent;
- entry->info_ent = &hl_debugfs_list[i];
- entry->dev_entry = dev_entry;
- }
-}
-
-void hl_debugfs_remove_device(struct hl_device *hdev)
-{
- struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
-
- debugfs_remove_recursive(entry->root);
-
- mutex_destroy(&entry->file_mutex);
- kfree(entry->entry_arr);
-}
-
-void hl_debugfs_add_file(struct hl_fpriv *hpriv)
-{
- struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
-
- mutex_lock(&dev_entry->file_mutex);
- list_add(&hpriv->debugfs_list, &dev_entry->file_list);
- mutex_unlock(&dev_entry->file_mutex);
-}
-
-void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
-{
- struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
-
- mutex_lock(&dev_entry->file_mutex);
- list_del(&hpriv->debugfs_list);
- mutex_unlock(&dev_entry->file_mutex);
-}
-
-void hl_debugfs_add_cb(struct hl_cb *cb)
-{
- struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
-
- spin_lock(&dev_entry->cb_spinlock);
- list_add(&cb->debugfs_list, &dev_entry->cb_list);
- spin_unlock(&dev_entry->cb_spinlock);
-}
-
-void hl_debugfs_remove_cb(struct hl_cb *cb)
-{
- struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
-
- spin_lock(&dev_entry->cb_spinlock);
- list_del(&cb->debugfs_list);
- spin_unlock(&dev_entry->cb_spinlock);
-}
-
-void hl_debugfs_add_cs(struct hl_cs *cs)
-{
- struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
-
- spin_lock(&dev_entry->cs_spinlock);
- list_add(&cs->debugfs_list, &dev_entry->cs_list);
- spin_unlock(&dev_entry->cs_spinlock);
-}
-
-void hl_debugfs_remove_cs(struct hl_cs *cs)
-{
- struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
-
- spin_lock(&dev_entry->cs_spinlock);
- list_del(&cs->debugfs_list);
- spin_unlock(&dev_entry->cs_spinlock);
-}
-
-void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
-{
- struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
- spin_lock(&dev_entry->cs_job_spinlock);
- list_add(&job->debugfs_list, &dev_entry->cs_job_list);
- spin_unlock(&dev_entry->cs_job_spinlock);
-}
-
-void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
-{
- struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
- spin_lock(&dev_entry->cs_job_spinlock);
- list_del(&job->debugfs_list);
- spin_unlock(&dev_entry->cs_job_spinlock);
-}
-
-void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
-{
- struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
- spin_lock(&dev_entry->userptr_spinlock);
- list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
- spin_unlock(&dev_entry->userptr_spinlock);
-}
-
-void hl_debugfs_remove_userptr(struct hl_device *hdev,
- struct hl_userptr *userptr)
-{
- struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
- spin_lock(&dev_entry->userptr_spinlock);
- list_del(&userptr->debugfs_list);
- spin_unlock(&dev_entry->userptr_spinlock);
-}
-
-void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
-{
- struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
- spin_lock(&dev_entry->ctx_mem_hash_spinlock);
- list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
- spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
-}
-
-void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
-{
- struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
-
- spin_lock(&dev_entry->ctx_mem_hash_spinlock);
- list_del(&ctx->debugfs_list);
- spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
-}
-
-void __init hl_debugfs_init(void)
-{
- hl_debug_root = debugfs_create_dir("habanalabs", NULL);
-}
-
-void hl_debugfs_fini(void)
-{
- debugfs_remove_recursive(hl_debug_root);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#define pr_fmt(fmt) "habanalabs: " fmt
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-#include <linux/sched/signal.h>
-#include <linux/hwmon.h>
-#include <uapi/misc/habanalabs.h>
-
-#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
-
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
-{
- if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
- return true;
- else
- return false;
-}
-
-enum hl_device_status hl_device_status(struct hl_device *hdev)
-{
- enum hl_device_status status;
-
- if (hdev->disabled)
- status = HL_DEVICE_STATUS_MALFUNCTION;
- else if (atomic_read(&hdev->in_reset))
- status = HL_DEVICE_STATUS_IN_RESET;
- else
- status = HL_DEVICE_STATUS_OPERATIONAL;
-
- return status;
-}
-
-static void hpriv_release(struct kref *ref)
-{
- struct hl_fpriv *hpriv;
- struct hl_device *hdev;
-
- hpriv = container_of(ref, struct hl_fpriv, refcount);
-
- hdev = hpriv->hdev;
-
- put_pid(hpriv->taskpid);
-
- hl_debugfs_remove_file(hpriv);
-
- mutex_destroy(&hpriv->restore_phase_mutex);
-
- mutex_lock(&hdev->fpriv_list_lock);
- list_del(&hpriv->dev_node);
- hdev->compute_ctx = NULL;
- mutex_unlock(&hdev->fpriv_list_lock);
-
- kfree(hpriv);
-}
-
-void hl_hpriv_get(struct hl_fpriv *hpriv)
-{
- kref_get(&hpriv->refcount);
-}
-
-void hl_hpriv_put(struct hl_fpriv *hpriv)
-{
- kref_put(&hpriv->refcount, hpriv_release);
-}
-
-/*
- * hl_device_release - release function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
- *
- * Called when process closes an habanalabs device
- */
-static int hl_device_release(struct inode *inode, struct file *filp)
-{
- struct hl_fpriv *hpriv = filp->private_data;
-
- hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
- hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
-
- filp->private_data = NULL;
-
- hl_hpriv_put(hpriv);
-
- return 0;
-}
-
-static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
-{
- struct hl_fpriv *hpriv = filp->private_data;
- struct hl_device *hdev;
-
- filp->private_data = NULL;
-
- hdev = hpriv->hdev;
-
- mutex_lock(&hdev->fpriv_list_lock);
- list_del(&hpriv->dev_node);
- mutex_unlock(&hdev->fpriv_list_lock);
-
- kfree(hpriv);
-
- return 0;
-}
-
-/*
- * hl_mmap - mmap function for habanalabs device
- *
- * @*filp: pointer to file structure
- * @*vma: pointer to vm_area_struct of the process
- *
- * Called when process does an mmap on habanalabs device. Call the device's mmap
- * function at the end of the common code.
- */
-static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- struct hl_fpriv *hpriv = filp->private_data;
-
- if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
- vma->vm_pgoff ^= HL_MMAP_CB_MASK;
- return hl_cb_mmap(hpriv, vma);
- }
-
- return -EINVAL;
-}
-
-static const struct file_operations hl_ops = {
- .owner = THIS_MODULE,
- .open = hl_device_open,
- .release = hl_device_release,
- .mmap = hl_mmap,
- .unlocked_ioctl = hl_ioctl,
- .compat_ioctl = hl_ioctl
-};
-
-static const struct file_operations hl_ctrl_ops = {
- .owner = THIS_MODULE,
- .open = hl_device_open_ctrl,
- .release = hl_device_release_ctrl,
- .unlocked_ioctl = hl_ioctl_control,
- .compat_ioctl = hl_ioctl_control
-};
-
-static void device_release_func(struct device *dev)
-{
- kfree(dev);
-}
-
-/*
- * device_init_cdev - Initialize cdev and device for habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- * @hclass: pointer to the class object of the device
- * @minor: minor number of the specific device
- * @fpos: file operations to install for this device
- * @name: name of the device as it will appear in the filesystem
- * @cdev: pointer to the char device object that will be initialized
- * @dev: pointer to the device object that will be initialized
- *
- * Initialize a cdev and a Linux device for habanalabs's device.
- */
-static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
- int minor, const struct file_operations *fops,
- char *name, struct cdev *cdev,
- struct device **dev)
-{
- cdev_init(cdev, fops);
- cdev->owner = THIS_MODULE;
-
- *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
- if (!*dev)
- return -ENOMEM;
-
- device_initialize(*dev);
- (*dev)->devt = MKDEV(hdev->major, minor);
- (*dev)->class = hclass;
- (*dev)->release = device_release_func;
- dev_set_drvdata(*dev, hdev);
- dev_set_name(*dev, "%s", name);
-
- return 0;
-}
-
-static int device_cdev_sysfs_add(struct hl_device *hdev)
-{
- int rc;
-
- rc = cdev_device_add(&hdev->cdev, hdev->dev);
- if (rc) {
- dev_err(hdev->dev,
- "failed to add a char device to the system\n");
- return rc;
- }
-
- rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
- if (rc) {
- dev_err(hdev->dev,
- "failed to add a control char device to the system\n");
- goto delete_cdev_device;
- }
-
- /* hl_sysfs_init() must be done after adding the device to the system */
- rc = hl_sysfs_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize sysfs\n");
- goto delete_ctrl_cdev_device;
- }
-
- hdev->cdev_sysfs_created = true;
-
- return 0;
-
-delete_ctrl_cdev_device:
- cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
-delete_cdev_device:
- cdev_device_del(&hdev->cdev, hdev->dev);
- return rc;
-}
-
-static void device_cdev_sysfs_del(struct hl_device *hdev)
-{
- /* device_release() won't be called so must free devices explicitly */
- if (!hdev->cdev_sysfs_created) {
- kfree(hdev->dev_ctrl);
- kfree(hdev->dev);
- return;
- }
-
- hl_sysfs_fini(hdev);
- cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
- cdev_device_del(&hdev->cdev, hdev->dev);
-}
-
-/*
- * device_early_init - do some early initialization for the habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Install the relevant function pointers and call the early_init function,
- * if such a function exists
- */
-static int device_early_init(struct hl_device *hdev)
-{
- int i, rc;
- char workq_name[32];
-
- switch (hdev->asic_type) {
- case ASIC_GOYA:
- goya_set_asic_funcs(hdev);
- strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
- break;
- case ASIC_GAUDI:
- gaudi_set_asic_funcs(hdev);
- sprintf(hdev->asic_name, "GAUDI");
- break;
- default:
- dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
- hdev->asic_type);
- return -EINVAL;
- }
-
- rc = hdev->asic_funcs->early_init(hdev);
- if (rc)
- return rc;
-
- rc = hl_asid_init(hdev);
- if (rc)
- goto early_fini;
-
- if (hdev->asic_prop.completion_queues_count) {
- hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
- sizeof(*hdev->cq_wq),
- GFP_ATOMIC);
- if (!hdev->cq_wq) {
- rc = -ENOMEM;
- goto asid_fini;
- }
- }
-
- for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
- snprintf(workq_name, 32, "hl-free-jobs-%u", i);
- hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
- if (hdev->cq_wq == NULL) {
- dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
- rc = -ENOMEM;
- goto free_cq_wq;
- }
- }
-
- hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
- if (hdev->eq_wq == NULL) {
- dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
- rc = -ENOMEM;
- goto free_cq_wq;
- }
-
- hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
- GFP_KERNEL);
- if (!hdev->hl_chip_info) {
- rc = -ENOMEM;
- goto free_eq_wq;
- }
-
- hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
- sizeof(struct hl_device_idle_busy_ts),
- (GFP_KERNEL | __GFP_ZERO));
- if (!hdev->idle_busy_ts_arr) {
- rc = -ENOMEM;
- goto free_chip_info;
- }
-
- hl_cb_mgr_init(&hdev->kernel_cb_mgr);
-
- mutex_init(&hdev->send_cpu_message_lock);
- mutex_init(&hdev->debug_lock);
- mutex_init(&hdev->mmu_cache_lock);
- INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
- spin_lock_init(&hdev->hw_queues_mirror_lock);
- INIT_LIST_HEAD(&hdev->fpriv_list);
- mutex_init(&hdev->fpriv_list_lock);
- atomic_set(&hdev->in_reset, 0);
-
- return 0;
-
-free_chip_info:
- kfree(hdev->hl_chip_info);
-free_eq_wq:
- destroy_workqueue(hdev->eq_wq);
-free_cq_wq:
- for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
- if (hdev->cq_wq[i])
- destroy_workqueue(hdev->cq_wq[i]);
- kfree(hdev->cq_wq);
-asid_fini:
- hl_asid_fini(hdev);
-early_fini:
- if (hdev->asic_funcs->early_fini)
- hdev->asic_funcs->early_fini(hdev);
-
- return rc;
-}
-
-/*
- * device_early_fini - finalize all that was done in device_early_init
- *
- * @hdev: pointer to habanalabs device structure
- *
- */
-static void device_early_fini(struct hl_device *hdev)
-{
- int i;
-
- mutex_destroy(&hdev->mmu_cache_lock);
- mutex_destroy(&hdev->debug_lock);
- mutex_destroy(&hdev->send_cpu_message_lock);
-
- mutex_destroy(&hdev->fpriv_list_lock);
-
- hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
-
- kfree(hdev->idle_busy_ts_arr);
- kfree(hdev->hl_chip_info);
-
- destroy_workqueue(hdev->eq_wq);
-
- for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
- destroy_workqueue(hdev->cq_wq[i]);
- kfree(hdev->cq_wq);
-
- hl_asid_fini(hdev);
-
- if (hdev->asic_funcs->early_fini)
- hdev->asic_funcs->early_fini(hdev);
-}
-
-static void set_freq_to_low_job(struct work_struct *work)
-{
- struct hl_device *hdev = container_of(work, struct hl_device,
- work_freq.work);
-
- mutex_lock(&hdev->fpriv_list_lock);
-
- if (!hdev->compute_ctx)
- hl_device_set_frequency(hdev, PLL_LOW);
-
- mutex_unlock(&hdev->fpriv_list_lock);
-
- schedule_delayed_work(&hdev->work_freq,
- usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
-}
-
-static void hl_device_heartbeat(struct work_struct *work)
-{
- struct hl_device *hdev = container_of(work, struct hl_device,
- work_heartbeat.work);
-
- if (hl_device_disabled_or_in_reset(hdev))
- goto reschedule;
-
- if (!hdev->asic_funcs->send_heartbeat(hdev))
- goto reschedule;
-
- dev_err(hdev->dev, "Device heartbeat failed!\n");
- hl_device_reset(hdev, true, false);
-
- return;
-
-reschedule:
- schedule_delayed_work(&hdev->work_heartbeat,
- usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
-}
-
-/*
- * device_late_init - do late stuff initialization for the habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Do stuff that either needs the device H/W queues to be active or needs
- * to happen after all the rest of the initialization is finished
- */
-static int device_late_init(struct hl_device *hdev)
-{
- int rc;
-
- if (hdev->asic_funcs->late_init) {
- rc = hdev->asic_funcs->late_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "failed late initialization for the H/W\n");
- return rc;
- }
- }
-
- hdev->high_pll = hdev->asic_prop.high_pll;
-
- /* force setting to low frequency */
- hdev->curr_pll_profile = PLL_LOW;
-
- if (hdev->pm_mng_profile == PM_AUTO)
- hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
- else
- hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
-
- INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
- schedule_delayed_work(&hdev->work_freq,
- usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
-
- if (hdev->heartbeat) {
- INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
- schedule_delayed_work(&hdev->work_heartbeat,
- usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
- }
-
- hdev->late_init_done = true;
-
- return 0;
-}
-
-/*
- * device_late_fini - finalize all that was done in device_late_init
- *
- * @hdev: pointer to habanalabs device structure
- *
- */
-static void device_late_fini(struct hl_device *hdev)
-{
- if (!hdev->late_init_done)
- return;
-
- cancel_delayed_work_sync(&hdev->work_freq);
- if (hdev->heartbeat)
- cancel_delayed_work_sync(&hdev->work_heartbeat);
-
- if (hdev->asic_funcs->late_fini)
- hdev->asic_funcs->late_fini(hdev);
-
- hdev->late_init_done = false;
-}
-
-uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
-{
- struct hl_device_idle_busy_ts *ts;
- ktime_t zero_ktime, curr = ktime_get();
- u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
- s64 period_us, last_start_us, last_end_us, last_busy_time_us,
- total_busy_time_us = 0, total_busy_time_ms;
-
- zero_ktime = ktime_set(0, 0);
- period_us = period_ms * USEC_PER_MSEC;
- ts = &hdev->idle_busy_ts_arr[last_index];
-
- /* check case that device is currently in idle */
- if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
- !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
-
- last_index--;
- /* Handle case idle_busy_ts_idx was 0 */
- if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
- last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
-
- ts = &hdev->idle_busy_ts_arr[last_index];
- }
-
- while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
- /* Check if we are in last sample case. i.e. if the sample
- * begun before the sampling period. This could be a real
- * sample or 0 so need to handle both cases
- */
- last_start_us = ktime_to_us(
- ktime_sub(curr, ts->idle_to_busy_ts));
-
- if (last_start_us > period_us) {
-
- /* First check two cases:
- * 1. If the device is currently busy
- * 2. If the device was idle during the whole sampling
- * period
- */
-
- if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
- /* Check if the device is currently busy */
- if (ktime_compare(ts->idle_to_busy_ts,
- zero_ktime))
- return 100;
-
- /* We either didn't have any activity or we
- * reached an entry which is 0. Either way,
- * exit and return what was accumulated so far
- */
- break;
- }
-
- /* If sample has finished, check it is relevant */
- last_end_us = ktime_to_us(
- ktime_sub(curr, ts->busy_to_idle_ts));
-
- if (last_end_us > period_us)
- break;
-
- /* It is relevant so add it but with adjustment */
- last_busy_time_us = ktime_to_us(
- ktime_sub(ts->busy_to_idle_ts,
- ts->idle_to_busy_ts));
- total_busy_time_us += last_busy_time_us -
- (last_start_us - period_us);
- break;
- }
-
- /* Check if the sample is finished or still open */
- if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
- last_busy_time_us = ktime_to_us(
- ktime_sub(ts->busy_to_idle_ts,
- ts->idle_to_busy_ts));
- else
- last_busy_time_us = ktime_to_us(
- ktime_sub(curr, ts->idle_to_busy_ts));
-
- total_busy_time_us += last_busy_time_us;
-
- last_index--;
- /* Handle case idle_busy_ts_idx was 0 */
- if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
- last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
-
- ts = &hdev->idle_busy_ts_arr[last_index];
-
- overlap_cnt++;
- }
-
- total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
- USEC_PER_MSEC);
-
- return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
-}
-
-/*
- * hl_device_set_frequency - set the frequency of the device
- *
- * @hdev: pointer to habanalabs device structure
- * @freq: the new frequency value
- *
- * Change the frequency if needed. This function has no protection against
- * concurrency, therefore it is assumed that the calling function has protected
- * itself against the case of calling this function from multiple threads with
- * different values
- *
- * Returns 0 if no change was done, otherwise returns 1
- */
-int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
-{
- if ((hdev->pm_mng_profile == PM_MANUAL) ||
- (hdev->curr_pll_profile == freq))
- return 0;
-
- dev_dbg(hdev->dev, "Changing device frequency to %s\n",
- freq == PLL_HIGH ? "high" : "low");
-
- hdev->asic_funcs->set_pll_profile(hdev, freq);
-
- hdev->curr_pll_profile = freq;
-
- return 1;
-}
-
-int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
-{
- int rc = 0;
-
- mutex_lock(&hdev->debug_lock);
-
- if (!enable) {
- if (!hdev->in_debug) {
- dev_err(hdev->dev,
- "Failed to disable debug mode because device was not in debug mode\n");
- rc = -EFAULT;
- goto out;
- }
-
- if (!hdev->hard_reset_pending)
- hdev->asic_funcs->halt_coresight(hdev);
-
- hdev->in_debug = 0;
-
- if (!hdev->hard_reset_pending)
- hdev->asic_funcs->enable_clock_gating(hdev);
-
- goto out;
- }
-
- if (hdev->in_debug) {
- dev_err(hdev->dev,
- "Failed to enable debug mode because device is already in debug mode\n");
- rc = -EFAULT;
- goto out;
- }
-
- hdev->asic_funcs->disable_clock_gating(hdev);
- hdev->in_debug = 1;
-
-out:
- mutex_unlock(&hdev->debug_lock);
-
- return rc;
-}
-
-/*
- * hl_device_suspend - initiate device suspend
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Puts the hw in the suspend state (all asics).
- * Returns 0 for success or an error on failure.
- * Called at driver suspend.
- */
-int hl_device_suspend(struct hl_device *hdev)
-{
- int rc;
-
- pci_save_state(hdev->pdev);
-
- /* Block future CS/VM/JOB completion operations */
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
- if (rc) {
- dev_err(hdev->dev, "Can't suspend while in reset\n");
- return -EIO;
- }
-
- /* This blocks all other stuff that is not blocked by in_reset */
- hdev->disabled = true;
-
- /*
- * Flush anyone that is inside the critical section of enqueue
- * jobs to the H/W
- */
- hdev->asic_funcs->hw_queues_lock(hdev);
- hdev->asic_funcs->hw_queues_unlock(hdev);
-
- /* Flush processes that are sending message to CPU */
- mutex_lock(&hdev->send_cpu_message_lock);
- mutex_unlock(&hdev->send_cpu_message_lock);
-
- rc = hdev->asic_funcs->suspend(hdev);
- if (rc)
- dev_err(hdev->dev,
- "Failed to disable PCI access of device CPU\n");
-
- /* Shut down the device */
- pci_disable_device(hdev->pdev);
- pci_set_power_state(hdev->pdev, PCI_D3hot);
-
- return 0;
-}
-
-/*
- * hl_device_resume - initiate device resume
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Bring the hw back to operating state (all asics).
- * Returns 0 for success or an error on failure.
- * Called at driver resume.
- */
-int hl_device_resume(struct hl_device *hdev)
-{
- int rc;
-
- pci_set_power_state(hdev->pdev, PCI_D0);
- pci_restore_state(hdev->pdev);
- rc = pci_enable_device_mem(hdev->pdev);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to enable PCI device in resume\n");
- return rc;
- }
-
- pci_set_master(hdev->pdev);
-
- rc = hdev->asic_funcs->resume(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to resume device after suspend\n");
- goto disable_device;
- }
-
-
- hdev->disabled = false;
- atomic_set(&hdev->in_reset, 0);
-
- rc = hl_device_reset(hdev, true, false);
- if (rc) {
- dev_err(hdev->dev, "Failed to reset device during resume\n");
- goto disable_device;
- }
-
- return 0;
-
-disable_device:
- pci_clear_master(hdev->pdev);
- pci_disable_device(hdev->pdev);
-
- return rc;
-}
-
-static int device_kill_open_processes(struct hl_device *hdev)
-{
- u16 pending_total, pending_cnt;
- struct hl_fpriv *hpriv;
- struct task_struct *task = NULL;
-
- if (hdev->pldm)
- pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
- else
- pending_total = HL_PENDING_RESET_PER_SEC;
-
- /* Giving time for user to close FD, and for processes that are inside
- * hl_device_open to finish
- */
- if (!list_empty(&hdev->fpriv_list))
- ssleep(1);
-
- mutex_lock(&hdev->fpriv_list_lock);
-
- /* This section must be protected because we are dereferencing
- * pointers that are freed if the process exits
- */
- list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
- task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
- if (task) {
- dev_info(hdev->dev, "Killing user process pid=%d\n",
- task_pid_nr(task));
- send_sig(SIGKILL, task, 1);
- usleep_range(1000, 10000);
-
- put_task_struct(task);
- }
- }
-
- mutex_unlock(&hdev->fpriv_list_lock);
-
- /* We killed the open users, but because the driver cleans up after the
- * user contexts are closed (e.g. mmu mappings), we need to wait again
- * to make sure the cleaning phase is finished before continuing with
- * the reset
- */
-
- pending_cnt = pending_total;
-
- while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
- dev_info(hdev->dev,
- "Waiting for all unmap operations to finish before hard reset\n");
-
- pending_cnt--;
-
- ssleep(1);
- }
-
- return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
-}
-
-static void device_hard_reset_pending(struct work_struct *work)
-{
- struct hl_device_reset_work *device_reset_work =
- container_of(work, struct hl_device_reset_work, reset_work);
- struct hl_device *hdev = device_reset_work->hdev;
-
- hl_device_reset(hdev, true, true);
-
- kfree(device_reset_work);
-}
-
-/*
- * hl_device_reset - reset the device
- *
- * @hdev: pointer to habanalabs device structure
- * @hard_reset: should we do hard reset to all engines or just reset the
- * compute/dma engines
- * @from_hard_reset_thread: is the caller the hard-reset thread
- *
- * Block future CS and wait for pending CS to be enqueued
- * Call ASIC H/W fini
- * Flush all completions
- * Re-initialize all internal data structures
- * Call ASIC H/W init, late_init
- * Test queues
- * Enable device
- *
- * Returns 0 for success or an error on failure.
- */
-int hl_device_reset(struct hl_device *hdev, bool hard_reset,
- bool from_hard_reset_thread)
-{
- int i, rc;
-
- if (!hdev->init_done) {
- dev_err(hdev->dev,
- "Can't reset before initialization is done\n");
- return 0;
- }
-
- if ((!hard_reset) && (!hdev->supports_soft_reset)) {
- dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
- hard_reset = true;
- }
-
- /*
- * Prevent concurrency in this function - only one reset should be
- * done at any given time. Only need to perform this if we didn't
- * get from the dedicated hard reset thread
- */
- if (!from_hard_reset_thread) {
- /* Block future CS/VM/JOB completion operations */
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
- if (rc)
- return 0;
-
- if (hard_reset) {
- /* Disable PCI access from device F/W so he won't send
- * us additional interrupts. We disable MSI/MSI-X at
- * the halt_engines function and we can't have the F/W
- * sending us interrupts after that. We need to disable
- * the access here because if the device is marked
- * disable, the message won't be send. Also, in case
- * of heartbeat, the device CPU is marked as disable
- * so this message won't be sent
- */
- if (hl_fw_send_pci_access_msg(hdev,
- ARMCP_PACKET_DISABLE_PCI_ACCESS))
- dev_warn(hdev->dev,
- "Failed to disable PCI access by F/W\n");
- }
-
- /* This also blocks future CS/VM/JOB completion operations */
- hdev->disabled = true;
-
- /* Flush anyone that is inside the critical section of enqueue
- * jobs to the H/W
- */
- hdev->asic_funcs->hw_queues_lock(hdev);
- hdev->asic_funcs->hw_queues_unlock(hdev);
-
- /* Flush anyone that is inside device open */
- mutex_lock(&hdev->fpriv_list_lock);
- mutex_unlock(&hdev->fpriv_list_lock);
-
- dev_err(hdev->dev, "Going to RESET device!\n");
- }
-
-again:
- if ((hard_reset) && (!from_hard_reset_thread)) {
- struct hl_device_reset_work *device_reset_work;
-
- hdev->hard_reset_pending = true;
-
- device_reset_work = kzalloc(sizeof(*device_reset_work),
- GFP_ATOMIC);
- if (!device_reset_work) {
- rc = -ENOMEM;
- goto out_err;
- }
-
- /*
- * Because the reset function can't run from interrupt or
- * from heartbeat work, we need to call the reset function
- * from a dedicated work
- */
- INIT_WORK(&device_reset_work->reset_work,
- device_hard_reset_pending);
- device_reset_work->hdev = hdev;
- schedule_work(&device_reset_work->reset_work);
-
- return 0;
- }
-
- if (hard_reset) {
- device_late_fini(hdev);
-
- /*
- * Now that the heartbeat thread is closed, flush processes
- * which are sending messages to CPU
- */
- mutex_lock(&hdev->send_cpu_message_lock);
- mutex_unlock(&hdev->send_cpu_message_lock);
- }
-
- /*
- * Halt the engines and disable interrupts so we won't get any more
- * completions from H/W and we won't have any accesses from the
- * H/W to the host machine
- */
- hdev->asic_funcs->halt_engines(hdev, hard_reset);
-
- /* Go over all the queues, release all CS and their jobs */
- hl_cs_rollback_all(hdev);
-
- if (hard_reset) {
- /* Kill processes here after CS rollback. This is because the
- * process can't really exit until all its CSs are done, which
- * is what we do in cs rollback
- */
- rc = device_kill_open_processes(hdev);
- if (rc) {
- dev_crit(hdev->dev,
- "Failed to kill all open processes, stopping hard reset\n");
- goto out_err;
- }
-
- /* Flush the Event queue workers to make sure no other thread is
- * reading or writing to registers during the reset
- */
- flush_workqueue(hdev->eq_wq);
- }
-
- /* Release kernel context */
- if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
- hdev->kernel_ctx = NULL;
-
- /* Reset the H/W. It will be in idle state after this returns */
- hdev->asic_funcs->hw_fini(hdev, hard_reset);
-
- if (hard_reset) {
- hl_vm_fini(hdev);
- hl_mmu_fini(hdev);
- hl_eq_reset(hdev, &hdev->event_queue);
- }
-
- /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
- hl_hw_queue_reset(hdev, hard_reset);
- for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
- hl_cq_reset(hdev, &hdev->completion_queue[i]);
-
- hdev->idle_busy_ts_idx = 0;
- hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
- hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
-
- if (hdev->cs_active_cnt)
- dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
- hdev->cs_active_cnt);
-
- mutex_lock(&hdev->fpriv_list_lock);
-
- /* Make sure the context switch phase will run again */
- if (hdev->compute_ctx) {
- atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
- hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
- }
-
- mutex_unlock(&hdev->fpriv_list_lock);
-
- /* Finished tear-down, starting to re-initialize */
-
- if (hard_reset) {
- hdev->device_cpu_disabled = false;
- hdev->hard_reset_pending = false;
-
- if (hdev->kernel_ctx) {
- dev_crit(hdev->dev,
- "kernel ctx was alive during hard reset, something is terribly wrong\n");
- rc = -EBUSY;
- goto out_err;
- }
-
- rc = hl_mmu_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to initialize MMU S/W after hard reset\n");
- goto out_err;
- }
-
- /* Allocate the kernel context */
- hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
- GFP_KERNEL);
- if (!hdev->kernel_ctx) {
- rc = -ENOMEM;
- goto out_err;
- }
-
- hdev->compute_ctx = NULL;
-
- rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
- if (rc) {
- dev_err(hdev->dev,
- "failed to init kernel ctx in hard reset\n");
- kfree(hdev->kernel_ctx);
- hdev->kernel_ctx = NULL;
- goto out_err;
- }
- }
-
- rc = hdev->asic_funcs->hw_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "failed to initialize the H/W after reset\n");
- goto out_err;
- }
-
- hdev->disabled = false;
-
- /* Check that the communication with the device is working */
- rc = hdev->asic_funcs->test_queues(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to detect if device is alive after reset\n");
- goto out_err;
- }
-
- if (hard_reset) {
- rc = device_late_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "Failed late init after hard reset\n");
- goto out_err;
- }
-
- rc = hl_vm_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to init memory module after hard reset\n");
- goto out_err;
- }
-
- hl_set_max_power(hdev, hdev->max_power);
- } else {
- rc = hdev->asic_funcs->soft_reset_late_init(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "Failed late init after soft reset\n");
- goto out_err;
- }
- }
-
- atomic_set(&hdev->in_reset, 0);
-
- if (hard_reset)
- hdev->hard_reset_cnt++;
- else
- hdev->soft_reset_cnt++;
-
- dev_warn(hdev->dev, "Successfully finished resetting the device\n");
-
- return 0;
-
-out_err:
- hdev->disabled = true;
-
- if (hard_reset) {
- dev_err(hdev->dev,
- "Failed to reset! Device is NOT usable\n");
- hdev->hard_reset_cnt++;
- } else {
- dev_err(hdev->dev,
- "Failed to do soft-reset, trying hard reset\n");
- hdev->soft_reset_cnt++;
- hard_reset = true;
- goto again;
- }
-
- atomic_set(&hdev->in_reset, 0);
-
- return rc;
-}
-
-/*
- * hl_device_init - main initialization function for habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Allocate an id for the device, do early initialization and then call the
- * ASIC specific initialization functions. Finally, create the cdev and the
- * Linux device to expose it to the user
- */
-int hl_device_init(struct hl_device *hdev, struct class *hclass)
-{
- int i, rc, cq_cnt, cq_ready_cnt;
- char *name;
- bool add_cdev_sysfs_on_err = false;
-
- name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
- if (!name) {
- rc = -ENOMEM;
- goto out_disabled;
- }
-
- /* Initialize cdev and device structures */
- rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
- &hdev->cdev, &hdev->dev);
-
- kfree(name);
-
- if (rc)
- goto out_disabled;
-
- name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
- if (!name) {
- rc = -ENOMEM;
- goto free_dev;
- }
-
- /* Initialize cdev and device structures for control device */
- rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
- name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
-
- kfree(name);
-
- if (rc)
- goto free_dev;
-
- /* Initialize ASIC function pointers and perform early init */
- rc = device_early_init(hdev);
- if (rc)
- goto free_dev_ctrl;
-
- /*
- * Start calling ASIC initialization. First S/W then H/W and finally
- * late init
- */
- rc = hdev->asic_funcs->sw_init(hdev);
- if (rc)
- goto early_fini;
-
- /*
- * Initialize the H/W queues. Must be done before hw_init, because
- * there the addresses of the kernel queue are being written to the
- * registers of the device
- */
- rc = hl_hw_queues_create(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize kernel queues\n");
- goto sw_fini;
- }
-
- cq_cnt = hdev->asic_prop.completion_queues_count;
-
- /*
- * Initialize the completion queues. Must be done before hw_init,
- * because there the addresses of the completion queues are being
- * passed as arguments to request_irq
- */
- if (cq_cnt) {
- hdev->completion_queue = kcalloc(cq_cnt,
- sizeof(*hdev->completion_queue),
- GFP_KERNEL);
-
- if (!hdev->completion_queue) {
- dev_err(hdev->dev,
- "failed to allocate completion queues\n");
- rc = -ENOMEM;
- goto hw_queues_destroy;
- }
- }
-
- for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
- rc = hl_cq_init(hdev, &hdev->completion_queue[i],
- hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
- if (rc) {
- dev_err(hdev->dev,
- "failed to initialize completion queue\n");
- goto cq_fini;
- }
- hdev->completion_queue[i].cq_idx = i;
- }
-
- /*
- * Initialize the event queue. Must be done before hw_init,
- * because there the address of the event queue is being
- * passed as argument to request_irq
- */
- rc = hl_eq_init(hdev, &hdev->event_queue);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize event queue\n");
- goto cq_fini;
- }
-
- /* MMU S/W must be initialized before kernel context is created */
- rc = hl_mmu_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
- goto eq_fini;
- }
-
- /* Allocate the kernel context */
- hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
- if (!hdev->kernel_ctx) {
- rc = -ENOMEM;
- goto mmu_fini;
- }
-
- hdev->compute_ctx = NULL;
-
- rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize kernel context\n");
- kfree(hdev->kernel_ctx);
- goto mmu_fini;
- }
-
- rc = hl_cb_pool_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize CB pool\n");
- goto release_ctx;
- }
-
- hl_debugfs_add_device(hdev);
-
- if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
- dev_info(hdev->dev,
- "H/W state is dirty, must reset before initializing\n");
- hdev->asic_funcs->halt_engines(hdev, true);
- hdev->asic_funcs->hw_fini(hdev, true);
- }
-
- /*
- * From this point, in case of an error, add char devices and create
- * sysfs nodes as part of the error flow, to allow debugging.
- */
- add_cdev_sysfs_on_err = true;
-
- rc = hdev->asic_funcs->hw_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "failed to initialize the H/W\n");
- rc = 0;
- goto out_disabled;
- }
-
- hdev->disabled = false;
-
- /* Check that the communication with the device is working */
- rc = hdev->asic_funcs->test_queues(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to detect if device is alive\n");
- rc = 0;
- goto out_disabled;
- }
-
- rc = device_late_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed late initialization\n");
- rc = 0;
- goto out_disabled;
- }
-
- dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
- hdev->asic_name,
- hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
-
- rc = hl_vm_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to initialize memory module\n");
- rc = 0;
- goto out_disabled;
- }
-
- /*
- * Expose devices and sysfs nodes to user.
- * From here there is no need to add char devices and create sysfs nodes
- * in case of an error.
- */
- add_cdev_sysfs_on_err = false;
- rc = device_cdev_sysfs_add(hdev);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to add char devices and sysfs nodes\n");
- rc = 0;
- goto out_disabled;
- }
-
- /*
- * hl_hwmon_init() must be called after device_late_init(), because only
- * there we get the information from the device about which
- * hwmon-related sensors the device supports.
- * Furthermore, it must be done after adding the device to the system.
- */
- rc = hl_hwmon_init(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to initialize hwmon\n");
- rc = 0;
- goto out_disabled;
- }
-
- dev_notice(hdev->dev,
- "Successfully added device to habanalabs driver\n");
-
- hdev->init_done = true;
-
- return 0;
-
-release_ctx:
- if (hl_ctx_put(hdev->kernel_ctx) != 1)
- dev_err(hdev->dev,
- "kernel ctx is still alive on initialization failure\n");
-mmu_fini:
- hl_mmu_fini(hdev);
-eq_fini:
- hl_eq_fini(hdev, &hdev->event_queue);
-cq_fini:
- for (i = 0 ; i < cq_ready_cnt ; i++)
- hl_cq_fini(hdev, &hdev->completion_queue[i]);
- kfree(hdev->completion_queue);
-hw_queues_destroy:
- hl_hw_queues_destroy(hdev);
-sw_fini:
- hdev->asic_funcs->sw_fini(hdev);
-early_fini:
- device_early_fini(hdev);
-free_dev_ctrl:
- kfree(hdev->dev_ctrl);
-free_dev:
- kfree(hdev->dev);
-out_disabled:
- hdev->disabled = true;
- if (add_cdev_sysfs_on_err)
- device_cdev_sysfs_add(hdev);
- if (hdev->pdev)
- dev_err(&hdev->pdev->dev,
- "Failed to initialize hl%d. Device is NOT usable !\n",
- hdev->id / 2);
- else
- pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
- hdev->id / 2);
-
- return rc;
-}
-
-/*
- * hl_device_fini - main tear-down function for habanalabs device
- *
- * @hdev: pointer to habanalabs device structure
- *
- * Destroy the device, call ASIC fini functions and release the id
- */
-void hl_device_fini(struct hl_device *hdev)
-{
- int i, rc;
- ktime_t timeout;
-
- dev_info(hdev->dev, "Removing device\n");
-
- /*
- * This function is competing with the reset function, so try to
- * take the reset atomic and if we are already in middle of reset,
- * wait until reset function is finished. Reset function is designed
- * to always finish. However, in Gaudi, because of all the network
- * ports, the hard reset could take between 10-30 seconds
- */
-
- timeout = ktime_add_us(ktime_get(),
- HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
- while (rc) {
- usleep_range(50, 200);
- rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
- if (ktime_compare(ktime_get(), timeout) > 0) {
- WARN(1, "Failed to remove device because reset function did not finish\n");
- return;
- }
- }
-
- /* Mark device as disabled */
- hdev->disabled = true;
-
- /* Flush anyone that is inside the critical section of enqueue
- * jobs to the H/W
- */
- hdev->asic_funcs->hw_queues_lock(hdev);
- hdev->asic_funcs->hw_queues_unlock(hdev);
-
- /* Flush anyone that is inside device open */
- mutex_lock(&hdev->fpriv_list_lock);
- mutex_unlock(&hdev->fpriv_list_lock);
-
- hdev->hard_reset_pending = true;
-
- hl_hwmon_fini(hdev);
-
- device_late_fini(hdev);
-
- hl_debugfs_remove_device(hdev);
-
- /*
- * Halt the engines and disable interrupts so we won't get any more
- * completions from H/W and we won't have any accesses from the
- * H/W to the host machine
- */
- hdev->asic_funcs->halt_engines(hdev, true);
-
- /* Go over all the queues, release all CS and their jobs */
- hl_cs_rollback_all(hdev);
-
- /* Kill processes here after CS rollback. This is because the process
- * can't really exit until all its CSs are done, which is what we
- * do in cs rollback
- */
- rc = device_kill_open_processes(hdev);
- if (rc)
- dev_crit(hdev->dev, "Failed to kill all open processes\n");
-
- hl_cb_pool_fini(hdev);
-
- /* Release kernel context */
- if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
- dev_err(hdev->dev, "kernel ctx is still alive\n");
-
- /* Reset the H/W. It will be in idle state after this returns */
- hdev->asic_funcs->hw_fini(hdev, true);
-
- hl_vm_fini(hdev);
-
- hl_mmu_fini(hdev);
-
- hl_eq_fini(hdev, &hdev->event_queue);
-
- for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
- hl_cq_fini(hdev, &hdev->completion_queue[i]);
- kfree(hdev->completion_queue);
-
- hl_hw_queues_destroy(hdev);
-
- /* Call ASIC S/W finalize function */
- hdev->asic_funcs->sw_fini(hdev);
-
- device_early_fini(hdev);
-
- /* Hide devices and sysfs nodes from user */
- device_cdev_sysfs_del(hdev);
-
- pr_info("removed device successfully\n");
-}
-
-/*
- * MMIO register access helper functions.
- */
-
-/*
- * hl_rreg - Read an MMIO register
- *
- * @hdev: pointer to habanalabs device structure
- * @reg: MMIO register offset (in bytes)
- *
- * Returns the value of the MMIO register we are asked to read
- *
- */
-inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
-{
- return readl(hdev->rmmio + reg);
-}
-
-/*
- * hl_wreg - Write to an MMIO register
- *
- * @hdev: pointer to habanalabs device structure
- * @reg: MMIO register offset (in bytes)
- * @val: 32-bit value
- *
- * Writes the 32-bit value into the MMIO register
- *
- */
-inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
-{
- writel(val, hdev->rmmio + reg);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hl_boot_if.h"
-
-#include <linux/firmware.h>
-#include <linux/genalloc.h>
-#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/slab.h>
-
-/**
- * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
- *
- * @hdev: pointer to hl_device structure.
- * @fw_name: the firmware image name
- * @dst: IO memory mapped address space to copy firmware to
- *
- * Copy fw code from firmware file to device memory.
- *
- * Return: 0 on success, non-zero for failure.
- */
-int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
- void __iomem *dst)
-{
- const struct firmware *fw;
- const u64 *fw_data;
- size_t fw_size;
- int rc;
-
- rc = request_firmware(&fw, fw_name, hdev->dev);
- if (rc) {
- dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
- goto out;
- }
-
- fw_size = fw->size;
- if ((fw_size % 4) != 0) {
- dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
- fw_name, fw_size);
- rc = -EINVAL;
- goto out;
- }
-
- dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
-
- fw_data = (const u64 *) fw->data;
-
- memcpy_toio(dst, fw_data, fw_size);
-
-out:
- release_firmware(fw);
- return rc;
-}
-
-int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
-{
- struct armcp_packet pkt = {};
-
- pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
-
- return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
- sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
-}
-
-int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
- u16 len, u32 timeout, long *result)
-{
- struct armcp_packet *pkt;
- dma_addr_t pkt_dma_addr;
- u32 tmp;
- int rc = 0;
-
- pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
- &pkt_dma_addr);
- if (!pkt) {
- dev_err(hdev->dev,
- "Failed to allocate DMA memory for packet to CPU\n");
- return -ENOMEM;
- }
-
- memcpy(pkt, msg, len);
-
- mutex_lock(&hdev->send_cpu_message_lock);
-
- if (hdev->disabled)
- goto out;
-
- if (hdev->device_cpu_disabled) {
- rc = -EIO;
- goto out;
- }
-
- rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
- if (rc) {
- dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
- goto out;
- }
-
- rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
- (tmp == ARMCP_PACKET_FENCE_VAL), 1000,
- timeout, true);
-
- hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
-
- if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
- hdev->device_cpu_disabled = true;
- goto out;
- }
-
- tmp = le32_to_cpu(pkt->ctl);
-
- rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
- if (rc) {
- dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
- rc,
- (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
- >> ARMCP_PKT_CTL_OPCODE_SHIFT);
- rc = -EIO;
- } else if (result) {
- *result = (long) le64_to_cpu(pkt->result);
- }
-
-out:
- mutex_unlock(&hdev->send_cpu_message_lock);
-
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
-
- return rc;
-}
-
-int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
-{
- struct armcp_packet pkt;
- long result;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.value = cpu_to_le64(event_type);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- HL_DEVICE_TIMEOUT_USEC, &result);
-
- if (rc)
- dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
-
- return rc;
-}
-
-int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
- size_t irq_arr_size)
-{
- struct armcp_unmask_irq_arr_packet *pkt;
- size_t total_pkt_size;
- long result;
- int rc;
-
- total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
- irq_arr_size;
-
- /* data should be aligned to 8 bytes in order to ArmCP to copy it */
- total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
-
- /* total_pkt_size is casted to u16 later on */
- if (total_pkt_size > USHRT_MAX) {
- dev_err(hdev->dev, "too many elements in IRQ array\n");
- return -EINVAL;
- }
-
- pkt = kzalloc(total_pkt_size, GFP_KERNEL);
- if (!pkt)
- return -ENOMEM;
-
- pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
- memcpy(&pkt->irqs, irq_arr, irq_arr_size);
-
- pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
- total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
-
- if (rc)
- dev_err(hdev->dev, "failed to unmask IRQ array\n");
-
- kfree(pkt);
-
- return rc;
-}
-
-int hl_fw_test_cpu_queue(struct hl_device *hdev)
-{
- struct armcp_packet test_pkt = {};
- long result;
- int rc;
-
- test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
- sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
-
- if (!rc) {
- if (result != ARMCP_PACKET_FENCE_VAL)
- dev_err(hdev->dev,
- "CPU queue test failed (0x%08lX)\n", result);
- } else {
- dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
- }
-
- return rc;
-}
-
-void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
- dma_addr_t *dma_handle)
-{
- u64 kernel_addr;
-
- kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
-
- *dma_handle = hdev->cpu_accessible_dma_address +
- (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
-
- return (void *) (uintptr_t) kernel_addr;
-}
-
-void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
- void *vaddr)
-{
- gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
- size);
-}
-
-int hl_fw_send_heartbeat(struct hl_device *hdev)
-{
- struct armcp_packet hb_pkt = {};
- long result;
- int rc;
-
- hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
- sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
-
- if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
- rc = -EIO;
-
- return rc;
-}
-
-int hl_fw_armcp_info_get(struct hl_device *hdev)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct armcp_packet pkt = {};
- void *armcp_info_cpu_addr;
- dma_addr_t armcp_info_dma_addr;
- long result;
- int rc;
-
- armcp_info_cpu_addr =
- hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
- sizeof(struct armcp_info),
- &armcp_info_dma_addr);
- if (!armcp_info_cpu_addr) {
- dev_err(hdev->dev,
- "Failed to allocate DMA memory for ArmCP info packet\n");
- return -ENOMEM;
- }
-
- memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.addr = cpu_to_le64(armcp_info_dma_addr);
- pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- HL_ARMCP_INFO_TIMEOUT_USEC, &result);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to handle ArmCP info pkt, error %d\n", rc);
- goto out;
- }
-
- memcpy(&prop->armcp_info, armcp_info_cpu_addr,
- sizeof(prop->armcp_info));
-
- rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to build hwmon channel info, error %d\n", rc);
- rc = -EFAULT;
- goto out;
- }
-
-out:
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
- sizeof(struct armcp_info), armcp_info_cpu_addr);
-
- return rc;
-}
-
-int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
-{
- struct armcp_packet pkt = {};
- void *eeprom_info_cpu_addr;
- dma_addr_t eeprom_info_dma_addr;
- long result;
- int rc;
-
- eeprom_info_cpu_addr =
- hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
- max_size, &eeprom_info_dma_addr);
- if (!eeprom_info_cpu_addr) {
- dev_err(hdev->dev,
- "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
- return -ENOMEM;
- }
-
- memset(eeprom_info_cpu_addr, 0, max_size);
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
- pkt.data_max_size = cpu_to_le32(max_size);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to handle ArmCP EEPROM packet, error %d\n", rc);
- goto out;
- }
-
- /* result contains the actual size */
- memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
-
-out:
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
- eeprom_info_cpu_addr);
-
- return rc;
-}
-
-static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
-{
- u32 err_val;
-
- /* Some of the firmware status codes are deprecated in newer f/w
- * versions. In those versions, the errors are reported
- * in different registers. Therefore, we need to check those
- * registers and print the exact errors. Moreover, there
- * may be multiple errors, so we need to report on each error
- * separately. Some of the error codes might indicate a state
- * that is not an error per-se, but it is an error in production
- * environment
- */
- err_val = RREG32(boot_err0_reg);
- if (!(err_val & CPU_BOOT_ERR0_ENABLED))
- return;
-
- if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
- dev_err(hdev->dev,
- "Device boot error - DRAM initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
- dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
- if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
- dev_err(hdev->dev,
- "Device boot error - Thermal Sensor initialization failed\n");
- if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
- dev_warn(hdev->dev,
- "Device boot warning - Skipped DRAM initialization\n");
- if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
- dev_warn(hdev->dev,
- "Device boot error - Skipped waiting for BMC\n");
- if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
- dev_err(hdev->dev,
- "Device boot error - Serdes data from BMC not available\n");
- if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
- dev_err(hdev->dev,
- "Device boot error - NIC F/W initialization failed\n");
-}
-
-static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
-{
- switch (status) {
- case CPU_BOOT_STATUS_NA:
- dev_err(hdev->dev,
- "Device boot error - BTL did NOT run\n");
- break;
- case CPU_BOOT_STATUS_IN_WFE:
- dev_err(hdev->dev,
- "Device boot error - Stuck inside WFE loop\n");
- break;
- case CPU_BOOT_STATUS_IN_BTL:
- dev_err(hdev->dev,
- "Device boot error - Stuck in BTL\n");
- break;
- case CPU_BOOT_STATUS_IN_PREBOOT:
- dev_err(hdev->dev,
- "Device boot error - Stuck in Preboot\n");
- break;
- case CPU_BOOT_STATUS_IN_SPL:
- dev_err(hdev->dev,
- "Device boot error - Stuck in SPL\n");
- break;
- case CPU_BOOT_STATUS_IN_UBOOT:
- dev_err(hdev->dev,
- "Device boot error - Stuck in u-boot\n");
- break;
- case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
- dev_err(hdev->dev,
- "Device boot error - DRAM initialization failed\n");
- break;
- case CPU_BOOT_STATUS_UBOOT_NOT_READY:
- dev_err(hdev->dev,
- "Device boot error - u-boot stopped by user\n");
- break;
- case CPU_BOOT_STATUS_TS_INIT_FAIL:
- dev_err(hdev->dev,
- "Device boot error - Thermal Sensor initialization failed\n");
- break;
- default:
- dev_err(hdev->dev,
- "Device boot error - Invalid status code %d\n",
- status);
- break;
- }
-}
-
-int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
- u32 boot_err0_reg, bool skip_bmc,
- u32 cpu_timeout, u32 boot_fit_timeout)
-{
- u32 status;
- int rc;
-
- dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
- cpu_timeout / USEC_PER_SEC);
-
- /* Wait for boot FIT request */
- rc = hl_poll_timeout(
- hdev,
- cpu_boot_status_reg,
- status,
- status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
- 10000,
- boot_fit_timeout);
-
- if (rc) {
- dev_dbg(hdev->dev,
- "No boot fit request received, resuming boot\n");
- } else {
- rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
- if (rc)
- goto out;
-
- /* Clear device CPU message status */
- WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
-
- /* Signal device CPU that boot loader is ready */
- WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
-
- /* Poll for CPU device ack */
- rc = hl_poll_timeout(
- hdev,
- cpu_msg_status_reg,
- status,
- status == CPU_MSG_OK,
- 10000,
- boot_fit_timeout);
-
- if (rc) {
- dev_err(hdev->dev,
- "Timeout waiting for boot fit load ack\n");
- goto out;
- }
-
- /* Clear message */
- WREG32(msg_to_cpu_reg, KMD_MSG_NA);
- }
-
- /* Make sure CPU boot-loader is running */
- rc = hl_poll_timeout(
- hdev,
- cpu_boot_status_reg,
- status,
- (status == CPU_BOOT_STATUS_DRAM_RDY) ||
- (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
- (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
- (status == CPU_BOOT_STATUS_SRAM_AVAIL),
- 10000,
- cpu_timeout);
-
- /* Read U-Boot, preboot versions now in case we will later fail */
- hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
- hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
-
- /* Some of the status codes below are deprecated in newer f/w
- * versions but we keep them here for backward compatibility
- */
- if (rc) {
- hl_detect_cpu_boot_status(hdev, status);
- rc = -EIO;
- goto out;
- }
-
- if (!hdev->fw_loading) {
- dev_info(hdev->dev, "Skip loading FW\n");
- goto out;
- }
-
- if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
- goto out;
-
- dev_info(hdev->dev,
- "Loading firmware to device, may take some time...\n");
-
- rc = hdev->asic_funcs->load_firmware_to_device(hdev);
- if (rc)
- goto out;
-
- if (skip_bmc) {
- WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
-
- rc = hl_poll_timeout(
- hdev,
- cpu_boot_status_reg,
- status,
- (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
- 10000,
- cpu_timeout);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to get ACK on skipping BMC, %d\n",
- status);
- WREG32(msg_to_cpu_reg, KMD_MSG_NA);
- rc = -EIO;
- goto out;
- }
- }
-
- WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
-
- rc = hl_poll_timeout(
- hdev,
- cpu_boot_status_reg,
- status,
- (status == CPU_BOOT_STATUS_SRAM_AVAIL),
- 10000,
- cpu_timeout);
-
- /* Clear message */
- WREG32(msg_to_cpu_reg, KMD_MSG_NA);
-
- if (rc) {
- if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
- dev_err(hdev->dev,
- "Device reports FIT image is corrupted\n");
- else
- dev_err(hdev->dev,
- "Failed to load firmware to device, %d\n",
- status);
-
- rc = -EIO;
- goto out;
- }
-
- dev_info(hdev->dev, "Successfully loaded firmware to device\n");
-
-out:
- fw_read_errors(hdev, boot_err0_reg);
-
- return rc;
-}
# SPDX-License-Identifier: GPL-2.0-only
-subdir-ccflags-y += -I$(src)
+subdir-ccflags-y += -I$(src)/common
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \
gaudi/gaudi_coresight.o
#include <uapi/misc/habanalabs.h>
#include "habanalabs.h"
-#include "include/hl_boot_if.h"
+#include "include/common/hl_boot_if.h"
#include "include/gaudi/gaudi_packets.h"
#include "include/gaudi/gaudi.h"
#include "include/gaudi/gaudi_async_events.h"
#include <uapi/misc/habanalabs.h>
#include "habanalabs.h"
-#include "include/hl_boot_if.h"
+#include "include/common/hl_boot_if.h"
#include "include/goya/goya_packets.h"
#include "include/goya/goya.h"
#include "include/goya/goya_async_events.h"
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef HABANALABSP_H_
-#define HABANALABSP_H_
-
-#include "include/armcp_if.h"
-#include "include/qman_if.h"
-#include <uapi/misc/habanalabs.h>
-
-#include <linux/cdev.h>
-#include <linux/iopoll.h>
-#include <linux/irqreturn.h>
-#include <linux/dma-fence.h>
-#include <linux/dma-direction.h>
-#include <linux/scatterlist.h>
-#include <linux/hashtable.h>
-
-#define HL_NAME "habanalabs"
-
-#define HL_MMAP_CB_MASK (0x8000000000000000ull >> PAGE_SHIFT)
-
-#define HL_PENDING_RESET_PER_SEC 30
-
-#define HL_HARD_RESET_MAX_TIMEOUT 120
-
-#define HL_DEVICE_TIMEOUT_USEC 1000000 /* 1 s */
-
-#define HL_HEARTBEAT_PER_USEC 5000000 /* 5 s */
-
-#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */
-
-#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
-#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
-
-#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
-
-#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
-
-#define HL_IDLE_BUSY_TS_ARR_SIZE 4096
-
-/* Memory */
-#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
-
-/* MMU */
-#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
-
-/*
- * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
- * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
- */
-#define HL_RSVD_SOBS 4
-#define HL_RSVD_MONS 2
-
-#define HL_RSVD_SOBS_IN_USE 2
-#define HL_RSVD_MONS_IN_USE 1
-
-#define HL_MAX_SOB_VAL (1 << 15)
-
-#define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0))
-#define IS_MAX_PENDING_CS_VALID(n) (IS_POWER_OF_2(n) && (n > 1))
-
-#define HL_PCI_NUM_BARS 6
-
-/**
- * struct pgt_info - MMU hop page info.
- * @node: hash linked-list node for the pgts shadow hash of pgts.
- * @phys_addr: physical address of the pgt.
- * @shadow_addr: shadow hop in the host.
- * @ctx: pointer to the owner ctx.
- * @num_of_ptes: indicates how many ptes are used in the pgt.
- *
- * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
- * is needed during mapping, a new page is allocated and this structure holds
- * its essential information. During unmapping, if no valid PTEs remained in the
- * page, it is freed with its pgt_info structure.
- */
-struct pgt_info {
- struct hlist_node node;
- u64 phys_addr;
- u64 shadow_addr;
- struct hl_ctx *ctx;
- int num_of_ptes;
-};
-
-struct hl_device;
-struct hl_fpriv;
-
-/**
- * enum hl_pci_match_mode - pci match mode per region
- * @PCI_ADDRESS_MATCH_MODE: address match mode
- * @PCI_BAR_MATCH_MODE: bar match mode
- */
-enum hl_pci_match_mode {
- PCI_ADDRESS_MATCH_MODE,
- PCI_BAR_MATCH_MODE
-};
-
-/**
- * enum hl_fw_component - F/W components to read version through registers.
- * @FW_COMP_UBOOT: u-boot.
- * @FW_COMP_PREBOOT: preboot.
- */
-enum hl_fw_component {
- FW_COMP_UBOOT,
- FW_COMP_PREBOOT
-};
-
-/**
- * enum hl_queue_type - Supported QUEUE types.
- * @QUEUE_TYPE_NA: queue is not available.
- * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
- * host.
- * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
- * memories and/or operates the compute engines.
- * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
- * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
- * notifications are sent by H/W.
- */
-enum hl_queue_type {
- QUEUE_TYPE_NA,
- QUEUE_TYPE_EXT,
- QUEUE_TYPE_INT,
- QUEUE_TYPE_CPU,
- QUEUE_TYPE_HW
-};
-
-enum hl_cs_type {
- CS_TYPE_DEFAULT,
- CS_TYPE_SIGNAL,
- CS_TYPE_WAIT
-};
-
-/*
- * struct hl_inbound_pci_region - inbound region descriptor
- * @mode: pci match mode for this region
- * @addr: region target address
- * @size: region size in bytes
- * @offset_in_bar: offset within bar (address match mode)
- * @bar: bar id
- */
-struct hl_inbound_pci_region {
- enum hl_pci_match_mode mode;
- u64 addr;
- u64 size;
- u64 offset_in_bar;
- u8 bar;
-};
-
-/*
- * struct hl_outbound_pci_region - outbound region descriptor
- * @addr: region target address
- * @size: region size in bytes
- */
-struct hl_outbound_pci_region {
- u64 addr;
- u64 size;
-};
-
-/*
- * struct hl_hw_sob - H/W SOB info.
- * @hdev: habanalabs device structure.
- * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
- * @sob_id: id of this SOB.
- * @q_idx: the H/W queue that uses this SOB.
- */
-struct hl_hw_sob {
- struct hl_device *hdev;
- struct kref kref;
- u32 sob_id;
- u32 q_idx;
-};
-
-/**
- * struct hw_queue_properties - queue information.
- * @type: queue type.
- * @driver_only: true if only the driver is allowed to send a job to this queue,
- * false otherwise.
- * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
- * queue, false otherwise (a CB address must be provided).
- * @supports_sync_stream: True if queue supports sync stream
- */
-struct hw_queue_properties {
- enum hl_queue_type type;
- u8 driver_only;
- u8 requires_kernel_cb;
- u8 supports_sync_stream;
-};
-
-/**
- * enum vm_type_t - virtual memory mapping request information.
- * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
- * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
- */
-enum vm_type_t {
- VM_TYPE_USERPTR = 0x1,
- VM_TYPE_PHYS_PACK = 0x2
-};
-
-/**
- * enum hl_device_hw_state - H/W device state. use this to understand whether
- * to do reset before hw_init or not
- * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
- * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute
- * hw_init
- */
-enum hl_device_hw_state {
- HL_DEVICE_HW_STATE_CLEAN = 0,
- HL_DEVICE_HW_STATE_DIRTY
-};
-
-/**
- * struct hl_mmu_properties - ASIC specific MMU address translation properties.
- * @start_addr: virtual start address of the memory region.
- * @end_addr: virtual end address of the memory region.
- * @hop0_shift: shift of hop 0 mask.
- * @hop1_shift: shift of hop 1 mask.
- * @hop2_shift: shift of hop 2 mask.
- * @hop3_shift: shift of hop 3 mask.
- * @hop4_shift: shift of hop 4 mask.
- * @hop0_mask: mask to get the PTE address in hop 0.
- * @hop1_mask: mask to get the PTE address in hop 1.
- * @hop2_mask: mask to get the PTE address in hop 2.
- * @hop3_mask: mask to get the PTE address in hop 3.
- * @hop4_mask: mask to get the PTE address in hop 4.
- * @page_size: default page size used to allocate memory.
- */
-struct hl_mmu_properties {
- u64 start_addr;
- u64 end_addr;
- u64 hop0_shift;
- u64 hop1_shift;
- u64 hop2_shift;
- u64 hop3_shift;
- u64 hop4_shift;
- u64 hop0_mask;
- u64 hop1_mask;
- u64 hop2_mask;
- u64 hop3_mask;
- u64 hop4_mask;
- u32 page_size;
-};
-
-/**
- * struct asic_fixed_properties - ASIC specific immutable properties.
- * @hw_queues_props: H/W queues properties.
- * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
- * available sensors.
- * @uboot_ver: F/W U-boot version.
- * @preboot_ver: F/W Preboot version.
- * @dmmu: DRAM MMU address translation properties.
- * @pmmu: PCI (host) MMU address translation properties.
- * @pmmu_huge: PCI (host) MMU address translation properties for memory
- * allocated with huge pages.
- * @sram_base_address: SRAM physical start address.
- * @sram_end_address: SRAM physical end address.
- * @sram_user_base_address - SRAM physical start address for user access.
- * @dram_base_address: DRAM physical start address.
- * @dram_end_address: DRAM physical end address.
- * @dram_user_base_address: DRAM physical start address for user access.
- * @dram_size: DRAM total size.
- * @dram_pci_bar_size: size of PCI bar towards DRAM.
- * @max_power_default: max power of the device after reset
- * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
- * fault.
- * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
- * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
- * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
- * @mmu_dram_default_page_addr: DRAM default page physical address.
- * @mmu_pgt_size: MMU page tables total size.
- * @mmu_pte_size: PTE size in MMU page tables.
- * @mmu_hop_table_size: MMU hop table size.
- * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
- * @dram_page_size: page size for MMU DRAM allocation.
- * @cfg_size: configuration space size on SRAM.
- * @sram_size: total size of SRAM.
- * @max_asid: maximum number of open contexts (ASIDs).
- * @num_of_events: number of possible internal H/W IRQs.
- * @psoc_pci_pll_nr: PCI PLL NR value.
- * @psoc_pci_pll_nf: PCI PLL NF value.
- * @psoc_pci_pll_od: PCI PLL OD value.
- * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
- * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
- * @high_pll: high PLL frequency used by the device.
- * @cb_pool_cb_cnt: number of CBs in the CB pool.
- * @cb_pool_cb_size: size of each CB in the CB pool.
- * @max_pending_cs: maximum of concurrent pending command submissions
- * @max_queues: maximum amount of queues in the system
- * @sync_stream_first_sob: first sync object available for sync stream use
- * @sync_stream_first_mon: first monitor available for sync stream use
- * @tpc_enabled_mask: which TPCs are enabled.
- * @completion_queues_count: number of completion queues.
- */
-struct asic_fixed_properties {
- struct hw_queue_properties *hw_queues_props;
- struct armcp_info armcp_info;
- char uboot_ver[VERSION_MAX_LEN];
- char preboot_ver[VERSION_MAX_LEN];
- struct hl_mmu_properties dmmu;
- struct hl_mmu_properties pmmu;
- struct hl_mmu_properties pmmu_huge;
- u64 sram_base_address;
- u64 sram_end_address;
- u64 sram_user_base_address;
- u64 dram_base_address;
- u64 dram_end_address;
- u64 dram_user_base_address;
- u64 dram_size;
- u64 dram_pci_bar_size;
- u64 max_power_default;
- u64 dram_size_for_default_page_mapping;
- u64 pcie_dbi_base_address;
- u64 pcie_aux_dbi_reg_addr;
- u64 mmu_pgt_addr;
- u64 mmu_dram_default_page_addr;
- u32 mmu_pgt_size;
- u32 mmu_pte_size;
- u32 mmu_hop_table_size;
- u32 mmu_hop0_tables_total_size;
- u32 dram_page_size;
- u32 cfg_size;
- u32 sram_size;
- u32 max_asid;
- u32 num_of_events;
- u32 psoc_pci_pll_nr;
- u32 psoc_pci_pll_nf;
- u32 psoc_pci_pll_od;
- u32 psoc_pci_pll_div_factor;
- u32 psoc_timestamp_frequency;
- u32 high_pll;
- u32 cb_pool_cb_cnt;
- u32 cb_pool_cb_size;
- u32 max_pending_cs;
- u32 max_queues;
- u16 sync_stream_first_sob;
- u16 sync_stream_first_mon;
- u8 tpc_enabled_mask;
- u8 completion_queues_count;
-};
-
-/**
- * struct hl_cs_compl - command submission completion object.
- * @base_fence: kernel fence object.
- * @lock: spinlock to protect fence.
- * @hdev: habanalabs device structure.
- * @hw_sob: the H/W SOB used in this signal/wait CS.
- * @cs_seq: command submission sequence number.
- * @type: type of the CS - signal/wait.
- * @sob_val: the SOB value that is used in this signal/wait CS.
- */
-struct hl_cs_compl {
- struct dma_fence base_fence;
- spinlock_t lock;
- struct hl_device *hdev;
- struct hl_hw_sob *hw_sob;
- u64 cs_seq;
- enum hl_cs_type type;
- u16 sob_val;
-};
-
-/*
- * Command Buffers
- */
-
-/**
- * struct hl_cb_mgr - describes a Command Buffer Manager.
- * @cb_lock: protects cb_handles.
- * @cb_handles: an idr to hold all command buffer handles.
- */
-struct hl_cb_mgr {
- spinlock_t cb_lock;
- struct idr cb_handles; /* protected by cb_lock */
-};
-
-/**
- * struct hl_cb - describes a Command Buffer.
- * @refcount: reference counter for usage of the CB.
- * @hdev: pointer to device this CB belongs to.
- * @lock: spinlock to protect mmap/cs flows.
- * @debugfs_list: node in debugfs list of command buffers.
- * @pool_list: node in pool list of command buffers.
- * @kernel_address: Holds the CB's kernel virtual address.
- * @bus_address: Holds the CB's DMA address.
- * @mmap_size: Holds the CB's size that was mmaped.
- * @size: holds the CB's size.
- * @id: the CB's ID.
- * @cs_cnt: holds number of CS that this CB participates in.
- * @ctx_id: holds the ID of the owner's context.
- * @mmap: true if the CB is currently mmaped to user.
- * @is_pool: true if CB was acquired from the pool, false otherwise.
- */
-struct hl_cb {
- struct kref refcount;
- struct hl_device *hdev;
- spinlock_t lock;
- struct list_head debugfs_list;
- struct list_head pool_list;
- u64 kernel_address;
- dma_addr_t bus_address;
- u32 mmap_size;
- u32 size;
- u32 id;
- u32 cs_cnt;
- u32 ctx_id;
- u8 mmap;
- u8 is_pool;
-};
-
-
-/*
- * QUEUES
- */
-
-struct hl_cs_job;
-
-/* Queue length of external and HW queues */
-#define HL_QUEUE_LENGTH 4096
-#define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
-
-#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
-#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
-#endif
-
-/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
-#define HL_CQ_LENGTH HL_QUEUE_LENGTH
-#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
-
-/* Must be power of 2 */
-#define HL_EQ_LENGTH 64
-#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
-
-/* Host <-> ArmCP shared memory size */
-#define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M
-
-/**
- * struct hl_hw_queue - describes a H/W transport queue.
- * @hw_sob: array of the used H/W SOBs by this H/W queue.
- * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
- * @queue_type: type of queue.
- * @kernel_address: holds the queue's kernel virtual address.
- * @bus_address: holds the queue's DMA address.
- * @pi: holds the queue's pi value.
- * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
- * @hw_queue_id: the id of the H/W queue.
- * @cq_id: the id for the corresponding CQ for this H/W queue.
- * @msi_vec: the IRQ number of the H/W queue.
- * @int_queue_len: length of internal queue (number of entries).
- * @next_sob_val: the next value to use for the currently used SOB.
- * @base_sob_id: the base SOB id of the SOBs used by this queue.
- * @base_mon_id: the base MON id of the MONs used by this queue.
- * @valid: is the queue valid (we have array of 32 queues, not all of them
- * exist).
- * @curr_sob_offset: the id offset to the currently used SOB from the
- * HL_RSVD_SOBS that are being used by this queue.
- * @supports_sync_stream: True if queue supports sync stream
- */
-struct hl_hw_queue {
- struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
- struct hl_cs_job **shadow_queue;
- enum hl_queue_type queue_type;
- u64 kernel_address;
- dma_addr_t bus_address;
- u32 pi;
- atomic_t ci;
- u32 hw_queue_id;
- u32 cq_id;
- u32 msi_vec;
- u16 int_queue_len;
- u16 next_sob_val;
- u16 base_sob_id;
- u16 base_mon_id;
- u8 valid;
- u8 curr_sob_offset;
- u8 supports_sync_stream;
-};
-
-/**
- * struct hl_cq - describes a completion queue
- * @hdev: pointer to the device structure
- * @kernel_address: holds the queue's kernel virtual address
- * @bus_address: holds the queue's DMA address
- * @cq_idx: completion queue index in array
- * @hw_queue_id: the id of the matching H/W queue
- * @ci: ci inside the queue
- * @pi: pi inside the queue
- * @free_slots_cnt: counter of free slots in queue
- */
-struct hl_cq {
- struct hl_device *hdev;
- u64 kernel_address;
- dma_addr_t bus_address;
- u32 cq_idx;
- u32 hw_queue_id;
- u32 ci;
- u32 pi;
- atomic_t free_slots_cnt;
-};
-
-/**
- * struct hl_eq - describes the event queue (single one per device)
- * @hdev: pointer to the device structure
- * @kernel_address: holds the queue's kernel virtual address
- * @bus_address: holds the queue's DMA address
- * @ci: ci inside the queue
- */
-struct hl_eq {
- struct hl_device *hdev;
- u64 kernel_address;
- dma_addr_t bus_address;
- u32 ci;
-};
-
-
-/*
- * ASICs
- */
-
-/**
- * enum hl_asic_type - supported ASIC types.
- * @ASIC_INVALID: Invalid ASIC type.
- * @ASIC_GOYA: Goya device.
- * @ASIC_GAUDI: Gaudi device.
- */
-enum hl_asic_type {
- ASIC_INVALID,
- ASIC_GOYA,
- ASIC_GAUDI
-};
-
-struct hl_cs_parser;
-
-/**
- * enum hl_pm_mng_profile - power management profile.
- * @PM_AUTO: internal clock is set by the Linux driver.
- * @PM_MANUAL: internal clock is set by the user.
- * @PM_LAST: last power management type.
- */
-enum hl_pm_mng_profile {
- PM_AUTO = 1,
- PM_MANUAL,
- PM_LAST
-};
-
-/**
- * enum hl_pll_frequency - PLL frequency.
- * @PLL_HIGH: high frequency.
- * @PLL_LOW: low frequency.
- * @PLL_LAST: last frequency values that were configured by the user.
- */
-enum hl_pll_frequency {
- PLL_HIGH = 1,
- PLL_LOW,
- PLL_LAST
-};
-
-#define PLL_REF_CLK 50
-
-enum div_select_defs {
- DIV_SEL_REF_CLK = 0,
- DIV_SEL_PLL_CLK = 1,
- DIV_SEL_DIVIDED_REF = 2,
- DIV_SEL_DIVIDED_PLL = 3,
-};
-
-/**
- * struct hl_asic_funcs - ASIC specific functions that are can be called from
- * common code.
- * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
- * @early_fini: tears down what was done in early_init.
- * @late_init: sets up late driver/hw state (post hw_init) - Optional.
- * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
- * @sw_init: sets up driver state, does not configure H/W.
- * @sw_fini: tears down driver state, does not configure H/W.
- * @hw_init: sets up the H/W state.
- * @hw_fini: tears down the H/W state.
- * @halt_engines: halt engines, needed for reset sequence. This also disables
- * interrupts from the device. Should be called before
- * hw_fini and before CS rollback.
- * @suspend: handles IP specific H/W or SW changes for suspend.
- * @resume: handles IP specific H/W or SW changes for resume.
- * @cb_mmap: maps a CB.
- * @ring_doorbell: increment PI on a given QMAN.
- * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
- * function because the PQs are located in different memory areas
- * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
- * writing the PQE must match the destination memory area
- * properties.
- * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
- * dma_alloc_coherent(). This is ASIC function because
- * its implementation is not trivial when the driver
- * is loaded in simulation mode (not upstreamed).
- * @asic_dma_free_coherent: Free coherent DMA memory by calling
- * dma_free_coherent(). This is ASIC function because
- * its implementation is not trivial when the driver
- * is loaded in simulation mode (not upstreamed).
- * @get_int_queue_base: get the internal queue base address.
- * @test_queues: run simple test on all queues for sanity check.
- * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
- * size of allocation is HL_DMA_POOL_BLK_SIZE.
- * @asic_dma_pool_free: free small DMA allocation from pool.
- * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
- * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
- * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
- * @cs_parser: parse Command Submission.
- * @asic_dma_map_sg: DMA map scatter-gather list.
- * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
- * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
- * @update_eq_ci: update event queue CI.
- * @context_switch: called upon ASID context switch.
- * @restore_phase_topology: clear all SOBs amd MONs.
- * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
- * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
- * @add_device_attr: add ASIC specific device attributes.
- * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
- * @set_pll_profile: change PLL profile (manual/automatic).
- * @get_events_stat: retrieve event queue entries histogram.
- * @read_pte: read MMU page table entry from DRAM.
- * @write_pte: write MMU page table entry to DRAM.
- * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
- * (L1 only) or hard (L0 & L1) flush.
- * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
- * ASID-VA-size mask.
- * @send_heartbeat: send is-alive packet to ArmCP and verify response.
- * @enable_clock_gating: enable clock gating for reducing power consumption.
- * @disable_clock_gating: disable clock for accessing registers on HBW.
- * @debug_coresight: perform certain actions on Coresight for debugging.
- * @is_device_idle: return true if device is idle, false otherwise.
- * @soft_reset_late_init: perform certain actions needed after soft reset.
- * @hw_queues_lock: acquire H/W queues lock.
- * @hw_queues_unlock: release H/W queues lock.
- * @get_pci_id: retrieve PCI ID.
- * @get_eeprom_data: retrieve EEPROM data from F/W.
- * @send_cpu_message: send buffer to ArmCP.
- * @get_hw_state: retrieve the H/W state
- * @pci_bars_map: Map PCI BARs.
- * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
- * old address the bar pointed to or U64_MAX for failure
- * @init_iatu: Initialize the iATU unit inside the PCI controller.
- * @rreg: Read a register. Needed for simulator support.
- * @wreg: Write a register. Needed for simulator support.
- * @halt_coresight: stop the ETF and ETR traces.
- * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
- * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
- * @read_device_fw_version: read the device's firmware versions that are
- * contained in registers
- * @load_firmware_to_device: load the firmware to the device's memory
- * @load_boot_fit_to_device: load boot fit to device's memory
- * @get_signal_cb_size: Get signal CB size.
- * @get_wait_cb_size: Get wait CB size.
- * @gen_signal_cb: Generate a signal CB.
- * @gen_wait_cb: Generate a wait CB.
- * @reset_sob: Reset a SOB.
- * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
- * firmware configuration
- * @get_device_time: Get the device time.
- */
-struct hl_asic_funcs {
- int (*early_init)(struct hl_device *hdev);
- int (*early_fini)(struct hl_device *hdev);
- int (*late_init)(struct hl_device *hdev);
- void (*late_fini)(struct hl_device *hdev);
- int (*sw_init)(struct hl_device *hdev);
- int (*sw_fini)(struct hl_device *hdev);
- int (*hw_init)(struct hl_device *hdev);
- void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
- void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
- int (*suspend)(struct hl_device *hdev);
- int (*resume)(struct hl_device *hdev);
- int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
- u64 kaddress, phys_addr_t paddress, u32 size);
- void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
- void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
- struct hl_bd *bd);
- void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag);
- void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle);
- void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
- dma_addr_t *dma_handle, u16 *queue_len);
- int (*test_queues)(struct hl_device *hdev);
- void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
- gfp_t mem_flags, dma_addr_t *dma_handle);
- void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
- dma_addr_t dma_addr);
- void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
- size_t size, dma_addr_t *dma_handle);
- void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
- size_t size, void *vaddr);
- void (*hl_dma_unmap_sg)(struct hl_device *hdev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction dir);
- int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
- int (*asic_dma_map_sg)(struct hl_device *hdev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction dir);
- u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
- struct sg_table *sgt);
- void (*add_end_of_cb_packets)(struct hl_device *hdev,
- u64 kernel_address, u32 len,
- u64 cq_addr, u32 cq_val, u32 msix_num,
- bool eb);
- void (*update_eq_ci)(struct hl_device *hdev, u32 val);
- int (*context_switch)(struct hl_device *hdev, u32 asid);
- void (*restore_phase_topology)(struct hl_device *hdev);
- int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
- int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
- int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
- int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
- void (*add_device_attr)(struct hl_device *hdev,
- struct attribute_group *dev_attr_grp);
- void (*handle_eqe)(struct hl_device *hdev,
- struct hl_eq_entry *eq_entry);
- void (*set_pll_profile)(struct hl_device *hdev,
- enum hl_pll_frequency freq);
- void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
- u32 *size);
- u64 (*read_pte)(struct hl_device *hdev, u64 addr);
- void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
- int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
- u32 flags);
- int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
- u32 asid, u64 va, u64 size);
- int (*send_heartbeat)(struct hl_device *hdev);
- void (*enable_clock_gating)(struct hl_device *hdev);
- void (*disable_clock_gating)(struct hl_device *hdev);
- int (*debug_coresight)(struct hl_device *hdev, void *data);
- bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
- struct seq_file *s);
- int (*soft_reset_late_init)(struct hl_device *hdev);
- void (*hw_queues_lock)(struct hl_device *hdev);
- void (*hw_queues_unlock)(struct hl_device *hdev);
- u32 (*get_pci_id)(struct hl_device *hdev);
- int (*get_eeprom_data)(struct hl_device *hdev, void *data,
- size_t max_size);
- int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
- u16 len, u32 timeout, long *result);
- enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
- int (*pci_bars_map)(struct hl_device *hdev);
- u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
- int (*init_iatu)(struct hl_device *hdev);
- u32 (*rreg)(struct hl_device *hdev, u32 reg);
- void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
- void (*halt_coresight)(struct hl_device *hdev);
- int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
- u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
- void (*read_device_fw_version)(struct hl_device *hdev,
- enum hl_fw_component fwc);
- int (*load_firmware_to_device)(struct hl_device *hdev);
- int (*load_boot_fit_to_device)(struct hl_device *hdev);
- u32 (*get_signal_cb_size)(struct hl_device *hdev);
- u32 (*get_wait_cb_size)(struct hl_device *hdev);
- void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
- void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
- u16 sob_val, u16 mon_id, u32 q_idx);
- void (*reset_sob)(struct hl_device *hdev, void *data);
- void (*set_dma_mask_from_fw)(struct hl_device *hdev);
- u64 (*get_device_time)(struct hl_device *hdev);
-};
-
-
-/*
- * CONTEXTS
- */
-
-#define HL_KERNEL_ASID_ID 0
-
-/**
- * struct hl_va_range - virtual addresses range.
- * @lock: protects the virtual addresses list.
- * @list: list of virtual addresses blocks available for mappings.
- * @start_addr: range start address.
- * @end_addr: range end address.
- */
-struct hl_va_range {
- struct mutex lock;
- struct list_head list;
- u64 start_addr;
- u64 end_addr;
-};
-
-/**
- * struct hl_ctx - user/kernel context.
- * @mem_hash: holds mapping from virtual address to virtual memory area
- * descriptor (hl_vm_phys_pg_list or hl_userptr).
- * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
- * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
- * @hdev: pointer to the device structure.
- * @refcount: reference counter for the context. Context is released only when
- * this hits 0l. It is incremented on CS and CS_WAIT.
- * @cs_pending: array of DMA fence objects representing pending CS.
- * @host_va_range: holds available virtual addresses for host mappings.
- * @host_huge_va_range: holds available virtual addresses for host mappings
- * with huge pages.
- * @dram_va_range: holds available virtual addresses for DRAM mappings.
- * @mem_hash_lock: protects the mem_hash.
- * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
- * MMU hash or walking the PGT requires talking this lock.
- * @debugfs_list: node in debugfs list of contexts.
- * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
- * to user so user could inquire about CS. It is used as
- * index to cs_pending array.
- * @dram_default_hops: array that holds all hops addresses needed for default
- * DRAM mapping.
- * @cs_lock: spinlock to protect cs_sequence.
- * @dram_phys_mem: amount of used physical DRAM memory by this context.
- * @thread_ctx_switch_token: token to prevent multiple threads of the same
- * context from running the context switch phase.
- * Only a single thread should run it.
- * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
- * the context switch phase from moving to their
- * execution phase before the context switch phase
- * has finished.
- * @asid: context's unique address space ID in the device's MMU.
- * @handle: context's opaque handle for user
- */
-struct hl_ctx {
- DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
- DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
- struct hl_fpriv *hpriv;
- struct hl_device *hdev;
- struct kref refcount;
- struct dma_fence **cs_pending;
- struct hl_va_range *host_va_range;
- struct hl_va_range *host_huge_va_range;
- struct hl_va_range *dram_va_range;
- struct mutex mem_hash_lock;
- struct mutex mmu_lock;
- struct list_head debugfs_list;
- struct hl_cs_counters cs_counters;
- u64 cs_sequence;
- u64 *dram_default_hops;
- spinlock_t cs_lock;
- atomic64_t dram_phys_mem;
- atomic_t thread_ctx_switch_token;
- u32 thread_ctx_switch_wait_token;
- u32 asid;
- u32 handle;
-};
-
-/**
- * struct hl_ctx_mgr - for handling multiple contexts.
- * @ctx_lock: protects ctx_handles.
- * @ctx_handles: idr to hold all ctx handles.
- */
-struct hl_ctx_mgr {
- struct mutex ctx_lock;
- struct idr ctx_handles;
-};
-
-
-
-/*
- * COMMAND SUBMISSIONS
- */
-
-/**
- * struct hl_userptr - memory mapping chunk information
- * @vm_type: type of the VM.
- * @job_node: linked-list node for hanging the object on the Job's list.
- * @vec: pointer to the frame vector.
- * @sgt: pointer to the scatter-gather table that holds the pages.
- * @dir: for DMA unmapping, the direction must be supplied, so save it.
- * @debugfs_list: node in debugfs list of command submissions.
- * @addr: user-space virtual address of the start of the memory area.
- * @size: size of the memory area to pin & map.
- * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
- */
-struct hl_userptr {
- enum vm_type_t vm_type; /* must be first */
- struct list_head job_node;
- struct frame_vector *vec;
- struct sg_table *sgt;
- enum dma_data_direction dir;
- struct list_head debugfs_list;
- u64 addr;
- u32 size;
- u8 dma_mapped;
-};
-
-/**
- * struct hl_cs - command submission.
- * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
- * @ctx: the context this CS belongs to.
- * @job_list: list of the CS's jobs in the various queues.
- * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
- * @refcount: reference counter for usage of the CS.
- * @fence: pointer to the fence object of this CS.
- * @signal_fence: pointer to the fence object of the signal CS (used by wait
- * CS only).
- * @finish_work: workqueue object to run when CS is completed by H/W.
- * @work_tdr: delayed work node for TDR.
- * @mirror_node : node in device mirror list of command submissions.
- * @debugfs_list: node in debugfs list of command submissions.
- * @sequence: the sequence number of this CS.
- * @type: CS_TYPE_*.
- * @submitted: true if CS was submitted to H/W.
- * @completed: true if CS was completed by device.
- * @timedout : true if CS was timedout.
- * @tdr_active: true if TDR was activated for this CS (to prevent
- * double TDR activation).
- * @aborted: true if CS was aborted due to some device error.
- */
-struct hl_cs {
- u16 *jobs_in_queue_cnt;
- struct hl_ctx *ctx;
- struct list_head job_list;
- spinlock_t job_lock;
- struct kref refcount;
- struct dma_fence *fence;
- struct dma_fence *signal_fence;
- struct work_struct finish_work;
- struct delayed_work work_tdr;
- struct list_head mirror_node;
- struct list_head debugfs_list;
- u64 sequence;
- enum hl_cs_type type;
- u8 submitted;
- u8 completed;
- u8 timedout;
- u8 tdr_active;
- u8 aborted;
-};
-
-/**
- * struct hl_cs_job - command submission job.
- * @cs_node: the node to hang on the CS jobs list.
- * @cs: the CS this job belongs to.
- * @user_cb: the CB we got from the user.
- * @patched_cb: in case of patching, this is internal CB which is submitted on
- * the queue instead of the CB we got from the IOCTL.
- * @finish_work: workqueue object to run when job is completed.
- * @userptr_list: linked-list of userptr mappings that belong to this job and
- * wait for completion.
- * @debugfs_list: node in debugfs list of command submission jobs.
- * @queue_type: the type of the H/W queue this job is submitted to.
- * @id: the id of this job inside a CS.
- * @hw_queue_id: the id of the H/W queue this job is submitted to.
- * @user_cb_size: the actual size of the CB we got from the user.
- * @job_cb_size: the actual size of the CB that we put on the queue.
- * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
- * handle to a kernel-allocated CB object, false
- * otherwise (SRAM/DRAM/host address).
- * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
- * info is needed later, when adding the 2xMSG_PROT at the
- * end of the JOB, to know which barriers to put in the
- * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
- * have streams so the engine can't be busy by another
- * stream.
- */
-struct hl_cs_job {
- struct list_head cs_node;
- struct hl_cs *cs;
- struct hl_cb *user_cb;
- struct hl_cb *patched_cb;
- struct work_struct finish_work;
- struct list_head userptr_list;
- struct list_head debugfs_list;
- enum hl_queue_type queue_type;
- u32 id;
- u32 hw_queue_id;
- u32 user_cb_size;
- u32 job_cb_size;
- u8 is_kernel_allocated_cb;
- u8 contains_dma_pkt;
-};
-
-/**
- * struct hl_cs_parser - command submission parser properties.
- * @user_cb: the CB we got from the user.
- * @patched_cb: in case of patching, this is internal CB which is submitted on
- * the queue instead of the CB we got from the IOCTL.
- * @job_userptr_list: linked-list of userptr mappings that belong to the related
- * job and wait for completion.
- * @cs_sequence: the sequence number of the related CS.
- * @queue_type: the type of the H/W queue this job is submitted to.
- * @ctx_id: the ID of the context the related CS belongs to.
- * @hw_queue_id: the id of the H/W queue this job is submitted to.
- * @user_cb_size: the actual size of the CB we got from the user.
- * @patched_cb_size: the size of the CB after parsing.
- * @job_id: the id of the related job inside the related CS.
- * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
- * handle to a kernel-allocated CB object, false
- * otherwise (SRAM/DRAM/host address).
- * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
- * info is needed later, when adding the 2xMSG_PROT at the
- * end of the JOB, to know which barriers to put in the
- * MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
- * have streams so the engine can't be busy by another
- * stream.
- */
-struct hl_cs_parser {
- struct hl_cb *user_cb;
- struct hl_cb *patched_cb;
- struct list_head *job_userptr_list;
- u64 cs_sequence;
- enum hl_queue_type queue_type;
- u32 ctx_id;
- u32 hw_queue_id;
- u32 user_cb_size;
- u32 patched_cb_size;
- u8 job_id;
- u8 is_kernel_allocated_cb;
- u8 contains_dma_pkt;
-};
-
-
-/*
- * MEMORY STRUCTURE
- */
-
-/**
- * struct hl_vm_hash_node - hash element from virtual address to virtual
- * memory area descriptor (hl_vm_phys_pg_list or
- * hl_userptr).
- * @node: node to hang on the hash table in context object.
- * @vaddr: key virtual address.
- * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
- */
-struct hl_vm_hash_node {
- struct hlist_node node;
- u64 vaddr;
- void *ptr;
-};
-
-/**
- * struct hl_vm_phys_pg_pack - physical page pack.
- * @vm_type: describes the type of the virtual area descriptor.
- * @pages: the physical page array.
- * @npages: num physical pages in the pack.
- * @total_size: total size of all the pages in this list.
- * @mapping_cnt: number of shared mappings.
- * @asid: the context related to this list.
- * @page_size: size of each page in the pack.
- * @flags: HL_MEM_* flags related to this list.
- * @handle: the provided handle related to this list.
- * @offset: offset from the first page.
- * @contiguous: is contiguous physical memory.
- * @created_from_userptr: is product of host virtual address.
- */
-struct hl_vm_phys_pg_pack {
- enum vm_type_t vm_type; /* must be first */
- u64 *pages;
- u64 npages;
- u64 total_size;
- atomic_t mapping_cnt;
- u32 asid;
- u32 page_size;
- u32 flags;
- u32 handle;
- u32 offset;
- u8 contiguous;
- u8 created_from_userptr;
-};
-
-/**
- * struct hl_vm_va_block - virtual range block information.
- * @node: node to hang on the virtual range list in context object.
- * @start: virtual range start address.
- * @end: virtual range end address.
- * @size: virtual range size.
- */
-struct hl_vm_va_block {
- struct list_head node;
- u64 start;
- u64 end;
- u64 size;
-};
-
-/**
- * struct hl_vm - virtual memory manager for MMU.
- * @dram_pg_pool: pool for DRAM physical pages of 2MB.
- * @dram_pg_pool_refcount: reference counter for the pool usage.
- * @idr_lock: protects the phys_pg_list_handles.
- * @phys_pg_pack_handles: idr to hold all device allocations handles.
- * @init_done: whether initialization was done. We need this because VM
- * initialization might be skipped during device initialization.
- */
-struct hl_vm {
- struct gen_pool *dram_pg_pool;
- struct kref dram_pg_pool_refcount;
- spinlock_t idr_lock;
- struct idr phys_pg_pack_handles;
- u8 init_done;
-};
-
-
-/*
- * DEBUG, PROFILING STRUCTURE
- */
-
-/**
- * struct hl_debug_params - Coresight debug parameters.
- * @input: pointer to component specific input parameters.
- * @output: pointer to component specific output parameters.
- * @output_size: size of output buffer.
- * @reg_idx: relevant register ID.
- * @op: component operation to execute.
- * @enable: true if to enable component debugging, false otherwise.
- */
-struct hl_debug_params {
- void *input;
- void *output;
- u32 output_size;
- u32 reg_idx;
- u32 op;
- bool enable;
-};
-
-/*
- * FILE PRIVATE STRUCTURE
- */
-
-/**
- * struct hl_fpriv - process information stored in FD private data.
- * @hdev: habanalabs device structure.
- * @filp: pointer to the given file structure.
- * @taskpid: current process ID.
- * @ctx: current executing context. TODO: remove for multiple ctx per process
- * @ctx_mgr: context manager to handle multiple context for this FD.
- * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
- * @debugfs_list: list of relevant ASIC debugfs.
- * @dev_node: node in the device list of file private data
- * @refcount: number of related contexts.
- * @restore_phase_mutex: lock for context switch and restore phase.
- * @is_control: true for control device, false otherwise
- */
-struct hl_fpriv {
- struct hl_device *hdev;
- struct file *filp;
- struct pid *taskpid;
- struct hl_ctx *ctx;
- struct hl_ctx_mgr ctx_mgr;
- struct hl_cb_mgr cb_mgr;
- struct list_head debugfs_list;
- struct list_head dev_node;
- struct kref refcount;
- struct mutex restore_phase_mutex;
- u8 is_control;
-};
-
-
-/*
- * DebugFS
- */
-
-/**
- * struct hl_info_list - debugfs file ops.
- * @name: file name.
- * @show: function to output information.
- * @write: function to write to the file.
- */
-struct hl_info_list {
- const char *name;
- int (*show)(struct seq_file *s, void *data);
- ssize_t (*write)(struct file *file, const char __user *buf,
- size_t count, loff_t *f_pos);
-};
-
-/**
- * struct hl_debugfs_entry - debugfs dentry wrapper.
- * @dent: base debugfs entry structure.
- * @info_ent: dentry realted ops.
- * @dev_entry: ASIC specific debugfs manager.
- */
-struct hl_debugfs_entry {
- struct dentry *dent;
- const struct hl_info_list *info_ent;
- struct hl_dbg_device_entry *dev_entry;
-};
-
-/**
- * struct hl_dbg_device_entry - ASIC specific debugfs manager.
- * @root: root dentry.
- * @hdev: habanalabs device structure.
- * @entry_arr: array of available hl_debugfs_entry.
- * @file_list: list of available debugfs files.
- * @file_mutex: protects file_list.
- * @cb_list: list of available CBs.
- * @cb_spinlock: protects cb_list.
- * @cs_list: list of available CSs.
- * @cs_spinlock: protects cs_list.
- * @cs_job_list: list of available CB jobs.
- * @cs_job_spinlock: protects cs_job_list.
- * @userptr_list: list of available userptrs (virtual memory chunk descriptor).
- * @userptr_spinlock: protects userptr_list.
- * @ctx_mem_hash_list: list of available contexts with MMU mappings.
- * @ctx_mem_hash_spinlock: protects cb_list.
- * @addr: next address to read/write from/to in read/write32.
- * @mmu_addr: next virtual address to translate to physical address in mmu_show.
- * @mmu_asid: ASID to use while translating in mmu_show.
- * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
- * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read.
- * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read.
- */
-struct hl_dbg_device_entry {
- struct dentry *root;
- struct hl_device *hdev;
- struct hl_debugfs_entry *entry_arr;
- struct list_head file_list;
- struct mutex file_mutex;
- struct list_head cb_list;
- spinlock_t cb_spinlock;
- struct list_head cs_list;
- spinlock_t cs_spinlock;
- struct list_head cs_job_list;
- spinlock_t cs_job_spinlock;
- struct list_head userptr_list;
- spinlock_t userptr_spinlock;
- struct list_head ctx_mem_hash_list;
- spinlock_t ctx_mem_hash_spinlock;
- u64 addr;
- u64 mmu_addr;
- u32 mmu_asid;
- u8 i2c_bus;
- u8 i2c_addr;
- u8 i2c_reg;
-};
-
-
-/*
- * DEVICES
- */
-
-/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
- * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
- */
-#define HL_MAX_MINORS 256
-
-/*
- * Registers read & write functions.
- */
-
-u32 hl_rreg(struct hl_device *hdev, u32 reg);
-void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
-
-#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
-#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
-#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
- hdev->asic_funcs->rreg(hdev, (reg)))
-
-#define WREG32_P(reg, val, mask) \
- do { \
- u32 tmp_ = RREG32(reg); \
- tmp_ &= (mask); \
- tmp_ |= ((val) & ~(mask)); \
- WREG32(reg, tmp_); \
- } while (0)
-#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
-#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
-
-#define RMWREG32(reg, val, mask) \
- do { \
- u32 tmp_ = RREG32(reg); \
- tmp_ &= ~(mask); \
- tmp_ |= ((val) << __ffs(mask)); \
- WREG32(reg, tmp_); \
- } while (0)
-
-#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
-
-#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
-#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
-#define WREG32_FIELD(reg, offset, field, val) \
- WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
- ~REG_FIELD_MASK(reg, field)) | \
- (val) << REG_FIELD_SHIFT(reg, field))
-
-/* Timeout should be longer when working with simulator but cap the
- * increased timeout to some maximum
- */
-#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
-({ \
- ktime_t __timeout; \
- if (hdev->pdev) \
- __timeout = ktime_add_us(ktime_get(), timeout_us); \
- else \
- __timeout = ktime_add_us(ktime_get(),\
- min((u64)(timeout_us * 10), \
- (u64) HL_SIM_MAX_TIMEOUT_US)); \
- might_sleep_if(sleep_us); \
- for (;;) { \
- (val) = RREG32(addr); \
- if (cond) \
- break; \
- if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
- (val) = RREG32(addr); \
- break; \
- } \
- if (sleep_us) \
- usleep_range((sleep_us >> 2) + 1, sleep_us); \
- } \
- (cond) ? 0 : -ETIMEDOUT; \
-})
-
-/*
- * address in this macro points always to a memory location in the
- * host's (server's) memory. That location is updated asynchronously
- * either by the direct access of the device or by another core.
- *
- * To work both in LE and BE architectures, we need to distinguish between the
- * two states (device or another core updates the memory location). Therefore,
- * if mem_written_by_device is true, the host memory being polled will be
- * updated directly by the device. If false, the host memory being polled will
- * be updated by host CPU. Required so host knows whether or not the memory
- * might need to be byte-swapped before returning value to caller.
- */
-#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
- mem_written_by_device) \
-({ \
- ktime_t __timeout; \
- if (hdev->pdev) \
- __timeout = ktime_add_us(ktime_get(), timeout_us); \
- else \
- __timeout = ktime_add_us(ktime_get(),\
- min((u64)(timeout_us * 10), \
- (u64) HL_SIM_MAX_TIMEOUT_US)); \
- might_sleep_if(sleep_us); \
- for (;;) { \
- /* Verify we read updates done by other cores or by device */ \
- mb(); \
- (val) = *((u32 *) (uintptr_t) (addr)); \
- if (mem_written_by_device) \
- (val) = le32_to_cpu(*(__le32 *) &(val)); \
- if (cond) \
- break; \
- if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
- (val) = *((u32 *) (uintptr_t) (addr)); \
- if (mem_written_by_device) \
- (val) = le32_to_cpu(*(__le32 *) &(val)); \
- break; \
- } \
- if (sleep_us) \
- usleep_range((sleep_us >> 2) + 1, sleep_us); \
- } \
- (cond) ? 0 : -ETIMEDOUT; \
-})
-
-#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
- timeout_us) \
-({ \
- ktime_t __timeout; \
- if (hdev->pdev) \
- __timeout = ktime_add_us(ktime_get(), timeout_us); \
- else \
- __timeout = ktime_add_us(ktime_get(),\
- min((u64)(timeout_us * 10), \
- (u64) HL_SIM_MAX_TIMEOUT_US)); \
- might_sleep_if(sleep_us); \
- for (;;) { \
- (val) = readl(addr); \
- if (cond) \
- break; \
- if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
- (val) = readl(addr); \
- break; \
- } \
- if (sleep_us) \
- usleep_range((sleep_us >> 2) + 1, sleep_us); \
- } \
- (cond) ? 0 : -ETIMEDOUT; \
-})
-
-struct hwmon_chip_info;
-
-/**
- * struct hl_device_reset_work - reset workqueue task wrapper.
- * @reset_work: reset work to be done.
- * @hdev: habanalabs device structure.
- */
-struct hl_device_reset_work {
- struct work_struct reset_work;
- struct hl_device *hdev;
-};
-
-/**
- * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
- * @idle_to_busy_ts: timestamp where device changed from idle to busy.
- * @busy_to_idle_ts: timestamp where device changed from busy to idle.
- */
-struct hl_device_idle_busy_ts {
- ktime_t idle_to_busy_ts;
- ktime_t busy_to_idle_ts;
-};
-
-/**
- * struct hl_device - habanalabs device structure.
- * @pdev: pointer to PCI device, can be NULL in case of simulator device.
- * @pcie_bar_phys: array of available PCIe bars physical addresses.
- * (required only for PCI address match mode)
- * @pcie_bar: array of available PCIe bars virtual addresses.
- * @rmmio: configuration area address on SRAM.
- * @cdev: related char device.
- * @cdev_ctrl: char device for control operations only (INFO IOCTL)
- * @dev: related kernel basic device structure.
- * @dev_ctrl: related kernel device structure for the control device
- * @work_freq: delayed work to lower device frequency if possible.
- * @work_heartbeat: delayed work for ArmCP is-alive check.
- * @asic_name: ASIC specific nmae.
- * @asic_type: ASIC specific type.
- * @completion_queue: array of hl_cq.
- * @cq_wq: work queues of completion queues for executing work in process
- * context.
- * @eq_wq: work queue of event queue for executing work in process context.
- * @kernel_ctx: Kernel driver context structure.
- * @kernel_queues: array of hl_hw_queue.
- * @hw_queues_mirror_list: CS mirror list for TDR.
- * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
- * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
- * @event_queue: event queue for IRQ from ArmCP.
- * @dma_pool: DMA pool for small allocations.
- * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
- * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
- * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
- * @asid_bitmap: holds used/available ASIDs.
- * @asid_mutex: protects asid_bitmap.
- * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
- * @debug_lock: protects critical section of setting debug mode for device
- * @asic_prop: ASIC specific immutable properties.
- * @asic_funcs: ASIC specific functions.
- * @asic_specific: ASIC specific information to use only from ASIC files.
- * @mmu_pgt_pool: pool of available MMU hops.
- * @vm: virtual memory manager for MMU.
- * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
- * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
- * @hwmon_dev: H/W monitor device.
- * @pm_mng_profile: current power management profile.
- * @hl_chip_info: ASIC's sensors information.
- * @hl_debugfs: device's debugfs manager.
- * @cb_pool: list of preallocated CBs.
- * @cb_pool_lock: protects the CB pool.
- * @fpriv_list: list of file private data structures. Each structure is created
- * when a user opens the device
- * @fpriv_list_lock: protects the fpriv_list
- * @compute_ctx: current compute context executing.
- * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
- * and vice-versa
- * @aggregated_cs_counters: aggregated cs counters among all contexts
- * @dram_used_mem: current DRAM memory consumption.
- * @timeout_jiffies: device CS timeout value.
- * @max_power: the max power of the device, as configured by the sysadmin. This
- * value is saved so in case of hard-reset, the driver will restore
- * this value and update the F/W after the re-initialization
- * @in_reset: is device in reset flow.
- * @curr_pll_profile: current PLL profile.
- * @cs_active_cnt: number of active command submissions on this device (active
- * means already in H/W queues)
- * @major: habanalabs kernel driver major.
- * @high_pll: high PLL profile frequency.
- * @soft_reset_cnt: number of soft reset since the driver was loaded.
- * @hard_reset_cnt: number of hard reset since the driver was loaded.
- * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
- * @id: device minor.
- * @id_control: minor of the control device
- * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
- * addresses.
- * @disabled: is device disabled.
- * @late_init_done: is late init stage was done during initialization.
- * @hwmon_initialized: is H/W monitor sensors was initialized.
- * @hard_reset_pending: is there a hard reset work pending.
- * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
- * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
- * otherwise.
- * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
- * @dram_default_page_mapping: is DRAM default page mapping enabled.
- * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
- * huge pages.
- * @init_done: is the initialization of the device done.
- * @mmu_enable: is MMU enabled.
- * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
- * @clock_gating: is clock gating enabled.
- * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
- * @dma_mask: the dma mask that was set for this device
- * @in_debug: is device under debug. This, together with fpriv_list, enforces
- * that only a single user is configuring the debug infrastructure.
- * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
- * only to POWER9 machines.
- * @cdev_sysfs_created: were char devices and sysfs nodes created.
- * @stop_on_err: true if engines should stop on error.
- * @supports_sync_stream: is sync stream supported.
- * @sync_stream_queue_idx: helper index for sync stream queues initialization.
- * @supports_coresight: is CoreSight supported.
- * @supports_soft_reset: is soft reset supported.
- */
-struct hl_device {
- struct pci_dev *pdev;
- u64 pcie_bar_phys[HL_PCI_NUM_BARS];
- void __iomem *pcie_bar[HL_PCI_NUM_BARS];
- void __iomem *rmmio;
- struct cdev cdev;
- struct cdev cdev_ctrl;
- struct device *dev;
- struct device *dev_ctrl;
- struct delayed_work work_freq;
- struct delayed_work work_heartbeat;
- char asic_name[16];
- enum hl_asic_type asic_type;
- struct hl_cq *completion_queue;
- struct workqueue_struct **cq_wq;
- struct workqueue_struct *eq_wq;
- struct hl_ctx *kernel_ctx;
- struct hl_hw_queue *kernel_queues;
- struct list_head hw_queues_mirror_list;
- spinlock_t hw_queues_mirror_lock;
- struct hl_cb_mgr kernel_cb_mgr;
- struct hl_eq event_queue;
- struct dma_pool *dma_pool;
- void *cpu_accessible_dma_mem;
- dma_addr_t cpu_accessible_dma_address;
- struct gen_pool *cpu_accessible_dma_pool;
- unsigned long *asid_bitmap;
- struct mutex asid_mutex;
- struct mutex send_cpu_message_lock;
- struct mutex debug_lock;
- struct asic_fixed_properties asic_prop;
- const struct hl_asic_funcs *asic_funcs;
- void *asic_specific;
- struct gen_pool *mmu_pgt_pool;
- struct hl_vm vm;
- struct mutex mmu_cache_lock;
- void *mmu_shadow_hop0;
- struct device *hwmon_dev;
- enum hl_pm_mng_profile pm_mng_profile;
- struct hwmon_chip_info *hl_chip_info;
-
- struct hl_dbg_device_entry hl_debugfs;
-
- struct list_head cb_pool;
- spinlock_t cb_pool_lock;
-
- struct list_head fpriv_list;
- struct mutex fpriv_list_lock;
-
- struct hl_ctx *compute_ctx;
-
- struct hl_device_idle_busy_ts *idle_busy_ts_arr;
-
- struct hl_cs_counters aggregated_cs_counters;
-
- atomic64_t dram_used_mem;
- u64 timeout_jiffies;
- u64 max_power;
- atomic_t in_reset;
- enum hl_pll_frequency curr_pll_profile;
- int cs_active_cnt;
- u32 major;
- u32 high_pll;
- u32 soft_reset_cnt;
- u32 hard_reset_cnt;
- u32 idle_busy_ts_idx;
- u16 id;
- u16 id_control;
- u16 cpu_pci_msb_addr;
- u8 disabled;
- u8 late_init_done;
- u8 hwmon_initialized;
- u8 hard_reset_pending;
- u8 heartbeat;
- u8 reset_on_lockup;
- u8 dram_supports_virtual_memory;
- u8 dram_default_page_mapping;
- u8 pmmu_huge_range;
- u8 init_done;
- u8 clock_gating;
- u8 device_cpu_disabled;
- u8 dma_mask;
- u8 in_debug;
- u8 power9_64bit_dma_enable;
- u8 cdev_sysfs_created;
- u8 stop_on_err;
- u8 supports_sync_stream;
- u8 sync_stream_queue_idx;
- u8 supports_coresight;
- u8 supports_soft_reset;
-
- /* Parameters for bring-up */
- u8 mmu_enable;
- u8 mmu_huge_page_opt;
- u8 cpu_enable;
- u8 reset_pcilink;
- u8 cpu_queues_enable;
- u8 fw_loading;
- u8 pldm;
- u8 axi_drain;
- u8 sram_scrambler_enable;
- u8 dram_scrambler_enable;
- u8 hard_reset_on_fw_events;
- u8 bmc_enable;
- u8 rl_enable;
-};
-
-
-/*
- * IOCTLs
- */
-
-/**
- * typedef hl_ioctl_t - typedef for ioctl function in the driver
- * @hpriv: pointer to the FD's private data, which contains state of
- * user process
- * @data: pointer to the input/output arguments structure of the IOCTL
- *
- * Return: 0 for success, negative value for error
- */
-typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
-
-/**
- * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
- * @cmd: the IOCTL code as created by the kernel macros.
- * @func: pointer to the driver's function that should be called for this IOCTL.
- */
-struct hl_ioctl_desc {
- unsigned int cmd;
- hl_ioctl_t *func;
-};
-
-
-/*
- * Kernel module functions that can be accessed by entire module
- */
-
-/**
- * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @range_start_address: The start address of the valid range.
- * @range_end_address: The end address of the valid range.
- *
- * Return: true if the area is inside the valid range, false otherwise.
- */
-static inline bool hl_mem_area_inside_range(u64 address, u32 size,
- u64 range_start_address, u64 range_end_address)
-{
- u64 end_address = address + size;
-
- if ((address >= range_start_address) &&
- (end_address <= range_end_address) &&
- (end_address > address))
- return true;
-
- return false;
-}
-
-/**
- * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @range_start_address: The start address of the valid range.
- * @range_end_address: The end address of the valid range.
- *
- * Return: true if the area overlaps part or all of the valid range,
- * false otherwise.
- */
-static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
- u64 range_start_address, u64 range_end_address)
-{
- u64 end_address = address + size;
-
- if ((address >= range_start_address) &&
- (address < range_end_address))
- return true;
-
- if ((end_address >= range_start_address) &&
- (end_address < range_end_address))
- return true;
-
- if ((address < range_start_address) &&
- (end_address >= range_end_address))
- return true;
-
- return false;
-}
-
-int hl_device_open(struct inode *inode, struct file *filp);
-int hl_device_open_ctrl(struct inode *inode, struct file *filp);
-bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
-enum hl_device_status hl_device_status(struct hl_device *hdev);
-int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
-int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
- enum hl_asic_type asic_type, int minor);
-void destroy_hdev(struct hl_device *hdev);
-int hl_hw_queues_create(struct hl_device *hdev);
-void hl_hw_queues_destroy(struct hl_device *hdev);
-int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
- u32 cb_size, u64 cb_ptr);
-int hl_hw_queue_schedule_cs(struct hl_cs *cs);
-u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
-void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
-void hl_int_hw_queue_update_ci(struct hl_cs *cs);
-void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
-
-#define hl_queue_inc_ptr(p) hl_hw_queue_add_ptr(p, 1)
-#define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1))
-
-int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
-void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
-int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
-void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
-void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
-void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
-irqreturn_t hl_irq_handler_cq(int irq, void *arg);
-irqreturn_t hl_irq_handler_eq(int irq, void *arg);
-u32 hl_cq_inc_ptr(u32 ptr);
-
-int hl_asid_init(struct hl_device *hdev);
-void hl_asid_fini(struct hl_device *hdev);
-unsigned long hl_asid_alloc(struct hl_device *hdev);
-void hl_asid_free(struct hl_device *hdev, unsigned long asid);
-
-int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
-void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
-int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
-void hl_ctx_do_release(struct kref *ref);
-void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx);
-int hl_ctx_put(struct hl_ctx *ctx);
-struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
-void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
-void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
-
-int hl_device_init(struct hl_device *hdev, struct class *hclass);
-void hl_device_fini(struct hl_device *hdev);
-int hl_device_suspend(struct hl_device *hdev);
-int hl_device_resume(struct hl_device *hdev);
-int hl_device_reset(struct hl_device *hdev, bool hard_reset,
- bool from_hard_reset_thread);
-void hl_hpriv_get(struct hl_fpriv *hpriv);
-void hl_hpriv_put(struct hl_fpriv *hpriv);
-int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
-uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
-
-int hl_build_hwmon_channel_info(struct hl_device *hdev,
- struct armcp_sensor *sensors_arr);
-
-int hl_sysfs_init(struct hl_device *hdev);
-void hl_sysfs_fini(struct hl_device *hdev);
-
-int hl_hwmon_init(struct hl_device *hdev);
-void hl_hwmon_fini(struct hl_device *hdev);
-
-int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
- u64 *handle, int ctx_id);
-int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
-int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
-struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
- u32 handle);
-void hl_cb_put(struct hl_cb *cb);
-void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
-void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
-struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
-int hl_cb_pool_init(struct hl_device *hdev);
-int hl_cb_pool_fini(struct hl_device *hdev);
-
-void hl_cs_rollback_all(struct hl_device *hdev);
-struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
- enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
-void hl_sob_reset_error(struct kref *ref);
-
-void goya_set_asic_funcs(struct hl_device *hdev);
-void gaudi_set_asic_funcs(struct hl_device *hdev);
-
-int hl_vm_ctx_init(struct hl_ctx *ctx);
-void hl_vm_ctx_fini(struct hl_ctx *ctx);
-
-int hl_vm_init(struct hl_device *hdev);
-void hl_vm_fini(struct hl_device *hdev);
-
-int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
- struct hl_userptr *userptr);
-void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
-void hl_userptr_delete_list(struct hl_device *hdev,
- struct list_head *userptr_list);
-bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
- struct list_head *userptr_list,
- struct hl_userptr **userptr);
-
-int hl_mmu_init(struct hl_device *hdev);
-void hl_mmu_fini(struct hl_device *hdev);
-int hl_mmu_ctx_init(struct hl_ctx *ctx);
-void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
- u32 page_size, bool flush_pte);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
- bool flush_pte);
-void hl_mmu_swap_out(struct hl_ctx *ctx);
-void hl_mmu_swap_in(struct hl_ctx *ctx);
-
-int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
- void __iomem *dst);
-int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
-int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
- u16 len, u32 timeout, long *result);
-int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
-int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
- size_t irq_arr_size);
-int hl_fw_test_cpu_queue(struct hl_device *hdev);
-void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
- dma_addr_t *dma_handle);
-void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
- void *vaddr);
-int hl_fw_send_heartbeat(struct hl_device *hdev);
-int hl_fw_armcp_info_get(struct hl_device *hdev);
-int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
-int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
- u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
- u32 boot_err0_reg, bool skip_bmc,
- u32 cpu_timeout, u32 boot_fit_timeout);
-
-int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
- bool is_wc[3]);
-int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
-int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
- u64 addr);
-int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
- struct hl_inbound_pci_region *pci_region);
-int hl_pci_set_outbound_region(struct hl_device *hdev,
- struct hl_outbound_pci_region *pci_region);
-int hl_pci_init(struct hl_device *hdev);
-void hl_pci_fini(struct hl_device *hdev);
-
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
-int hl_get_temperature(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_set_temperature(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-int hl_get_voltage(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_get_current(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_get_fan_speed(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-int hl_get_pwm_info(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value);
-void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
- long value);
-u64 hl_get_max_power(struct hl_device *hdev);
-void hl_set_max_power(struct hl_device *hdev, u64 value);
-int hl_set_voltage(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-int hl_set_current(struct hl_device *hdev,
- int sensor_index, u32 attr, long value);
-
-#ifdef CONFIG_DEBUG_FS
-
-void hl_debugfs_init(void);
-void hl_debugfs_fini(void);
-void hl_debugfs_add_device(struct hl_device *hdev);
-void hl_debugfs_remove_device(struct hl_device *hdev);
-void hl_debugfs_add_file(struct hl_fpriv *hpriv);
-void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
-void hl_debugfs_add_cb(struct hl_cb *cb);
-void hl_debugfs_remove_cb(struct hl_cb *cb);
-void hl_debugfs_add_cs(struct hl_cs *cs);
-void hl_debugfs_remove_cs(struct hl_cs *cs);
-void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
-void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
-void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
-void hl_debugfs_remove_userptr(struct hl_device *hdev,
- struct hl_userptr *userptr);
-void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
-void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
-
-#else
-
-static inline void __init hl_debugfs_init(void)
-{
-}
-
-static inline void hl_debugfs_fini(void)
-{
-}
-
-static inline void hl_debugfs_add_device(struct hl_device *hdev)
-{
-}
-
-static inline void hl_debugfs_remove_device(struct hl_device *hdev)
-{
-}
-
-static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
-{
-}
-
-static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
-{
-}
-
-static inline void hl_debugfs_add_cb(struct hl_cb *cb)
-{
-}
-
-static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
-{
-}
-
-static inline void hl_debugfs_add_cs(struct hl_cs *cs)
-{
-}
-
-static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
-{
-}
-
-static inline void hl_debugfs_add_job(struct hl_device *hdev,
- struct hl_cs_job *job)
-{
-}
-
-static inline void hl_debugfs_remove_job(struct hl_device *hdev,
- struct hl_cs_job *job)
-{
-}
-
-static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
- struct hl_userptr *userptr)
-{
-}
-
-static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
- struct hl_userptr *userptr)
-{
-}
-
-static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
- struct hl_ctx *ctx)
-{
-}
-
-static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
- struct hl_ctx *ctx)
-{
-}
-
-#endif
-
-/* IOCTLs */
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
-long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
-int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
-
-#endif /* HABANALABSP_H_ */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#define pr_fmt(fmt) "habanalabs: " fmt
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-#include <linux/module.h>
-
-#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
-
-#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
-
-MODULE_AUTHOR(HL_DRIVER_AUTHOR);
-MODULE_DESCRIPTION(HL_DRIVER_DESC);
-MODULE_LICENSE("GPL v2");
-
-static int hl_major;
-static struct class *hl_class;
-static DEFINE_IDR(hl_devs_idr);
-static DEFINE_MUTEX(hl_devs_idr_lock);
-
-static int timeout_locked = 5;
-static int reset_on_lockup = 1;
-
-module_param(timeout_locked, int, 0444);
-MODULE_PARM_DESC(timeout_locked,
- "Device lockup timeout in seconds (0 = disabled, default 5s)");
-
-module_param(reset_on_lockup, int, 0444);
-MODULE_PARM_DESC(reset_on_lockup,
- "Do device reset on lockup (0 = no, 1 = yes, default yes)");
-
-#define PCI_VENDOR_ID_HABANALABS 0x1da3
-
-#define PCI_IDS_GOYA 0x0001
-#define PCI_IDS_GAUDI 0x1000
-
-static const struct pci_device_id ids[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
- { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
- { 0, }
-};
-MODULE_DEVICE_TABLE(pci, ids);
-
-/*
- * get_asic_type - translate device id to asic type
- *
- * @device: id of the PCI device
- *
- * Translate device id to asic type.
- * In case of unidentified device, return -1
- */
-static enum hl_asic_type get_asic_type(u16 device)
-{
- enum hl_asic_type asic_type;
-
- switch (device) {
- case PCI_IDS_GOYA:
- asic_type = ASIC_GOYA;
- break;
- case PCI_IDS_GAUDI:
- asic_type = ASIC_GAUDI;
- break;
- default:
- asic_type = ASIC_INVALID;
- break;
- }
-
- return asic_type;
-}
-
-/*
- * hl_device_open - open function for habanalabs device
- *
- * @inode: pointer to inode structure
- * @filp: pointer to file structure
- *
- * Called when process opens an habanalabs device.
- */
-int hl_device_open(struct inode *inode, struct file *filp)
-{
- struct hl_device *hdev;
- struct hl_fpriv *hpriv;
- int rc;
-
- mutex_lock(&hl_devs_idr_lock);
- hdev = idr_find(&hl_devs_idr, iminor(inode));
- mutex_unlock(&hl_devs_idr_lock);
-
- if (!hdev) {
- pr_err("Couldn't find device %d:%d\n",
- imajor(inode), iminor(inode));
- return -ENXIO;
- }
-
- hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
- if (!hpriv)
- return -ENOMEM;
-
- hpriv->hdev = hdev;
- filp->private_data = hpriv;
- hpriv->filp = filp;
- mutex_init(&hpriv->restore_phase_mutex);
- kref_init(&hpriv->refcount);
- nonseekable_open(inode, filp);
-
- hl_cb_mgr_init(&hpriv->cb_mgr);
- hl_ctx_mgr_init(&hpriv->ctx_mgr);
-
- hpriv->taskpid = find_get_pid(current->pid);
-
- mutex_lock(&hdev->fpriv_list_lock);
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_err_ratelimited(hdev->dev,
- "Can't open %s because it is disabled or in reset\n",
- dev_name(hdev->dev));
- rc = -EPERM;
- goto out_err;
- }
-
- if (hdev->in_debug) {
- dev_err_ratelimited(hdev->dev,
- "Can't open %s because it is being debugged by another user\n",
- dev_name(hdev->dev));
- rc = -EPERM;
- goto out_err;
- }
-
- if (hdev->compute_ctx) {
- dev_dbg_ratelimited(hdev->dev,
- "Can't open %s because another user is working on it\n",
- dev_name(hdev->dev));
- rc = -EBUSY;
- goto out_err;
- }
-
- rc = hl_ctx_create(hdev, hpriv);
- if (rc) {
- dev_err(hdev->dev, "Failed to create context %d\n", rc);
- goto out_err;
- }
-
- /* Device is IDLE at this point so it is legal to change PLLs.
- * There is no need to check anything because if the PLL is
- * already HIGH, the set function will return without doing
- * anything
- */
- hl_device_set_frequency(hdev, PLL_HIGH);
-
- list_add(&hpriv->dev_node, &hdev->fpriv_list);
- mutex_unlock(&hdev->fpriv_list_lock);
-
- hl_debugfs_add_file(hpriv);
-
- return 0;
-
-out_err:
- mutex_unlock(&hdev->fpriv_list_lock);
-
- hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
- hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
- filp->private_data = NULL;
- mutex_destroy(&hpriv->restore_phase_mutex);
- put_pid(hpriv->taskpid);
-
- kfree(hpriv);
-
- return rc;
-}
-
-int hl_device_open_ctrl(struct inode *inode, struct file *filp)
-{
- struct hl_device *hdev;
- struct hl_fpriv *hpriv;
- int rc;
-
- mutex_lock(&hl_devs_idr_lock);
- hdev = idr_find(&hl_devs_idr, iminor(inode));
- mutex_unlock(&hl_devs_idr_lock);
-
- if (!hdev) {
- pr_err("Couldn't find device %d:%d\n",
- imajor(inode), iminor(inode));
- return -ENXIO;
- }
-
- hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
- if (!hpriv)
- return -ENOMEM;
-
- mutex_lock(&hdev->fpriv_list_lock);
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_err_ratelimited(hdev->dev_ctrl,
- "Can't open %s because it is disabled or in reset\n",
- dev_name(hdev->dev_ctrl));
- rc = -EPERM;
- goto out_err;
- }
-
- list_add(&hpriv->dev_node, &hdev->fpriv_list);
- mutex_unlock(&hdev->fpriv_list_lock);
-
- hpriv->hdev = hdev;
- filp->private_data = hpriv;
- hpriv->filp = filp;
- hpriv->is_control = true;
- nonseekable_open(inode, filp);
-
- hpriv->taskpid = find_get_pid(current->pid);
-
- return 0;
-
-out_err:
- mutex_unlock(&hdev->fpriv_list_lock);
- kfree(hpriv);
- return rc;
-}
-
-static void set_driver_behavior_per_device(struct hl_device *hdev)
-{
- hdev->mmu_enable = 1;
- hdev->cpu_enable = 1;
- hdev->fw_loading = 1;
- hdev->cpu_queues_enable = 1;
- hdev->heartbeat = 1;
- hdev->clock_gating = 1;
-
- hdev->reset_pcilink = 0;
- hdev->axi_drain = 0;
- hdev->sram_scrambler_enable = 1;
- hdev->dram_scrambler_enable = 1;
- hdev->bmc_enable = 1;
- hdev->hard_reset_on_fw_events = 1;
-}
-
-/*
- * create_hdev - create habanalabs device instance
- *
- * @dev: will hold the pointer to the new habanalabs device structure
- * @pdev: pointer to the pci device
- * @asic_type: in case of simulator device, which device is it
- * @minor: in case of simulator device, the minor of the device
- *
- * Allocate memory for habanalabs device and initialize basic fields
- * Identify the ASIC type
- * Allocate ID (minor) for the device (only for real devices)
- */
-int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
- enum hl_asic_type asic_type, int minor)
-{
- struct hl_device *hdev;
- int rc, main_id, ctrl_id = 0;
-
- *dev = NULL;
-
- hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
- if (!hdev)
- return -ENOMEM;
-
- /* First, we must find out which ASIC are we handling. This is needed
- * to configure the behavior of the driver (kernel parameters)
- */
- if (pdev) {
- hdev->asic_type = get_asic_type(pdev->device);
- if (hdev->asic_type == ASIC_INVALID) {
- dev_err(&pdev->dev, "Unsupported ASIC\n");
- rc = -ENODEV;
- goto free_hdev;
- }
- } else {
- hdev->asic_type = asic_type;
- }
-
- hdev->major = hl_major;
- hdev->reset_on_lockup = reset_on_lockup;
- hdev->pldm = 0;
-
- set_driver_behavior_per_device(hdev);
-
- if (timeout_locked)
- hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
- else
- hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
-
- hdev->disabled = true;
- hdev->pdev = pdev; /* can be NULL in case of simulator device */
-
- /* Set default DMA mask to 32 bits */
- hdev->dma_mask = 32;
-
- mutex_lock(&hl_devs_idr_lock);
-
- /* Always save 2 numbers, 1 for main device and 1 for control.
- * They must be consecutive
- */
- main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
- GFP_KERNEL);
-
- if (main_id >= 0)
- ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
- main_id + 2, GFP_KERNEL);
-
- mutex_unlock(&hl_devs_idr_lock);
-
- if ((main_id < 0) || (ctrl_id < 0)) {
- if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
- pr_err("too many devices in the system\n");
-
- if (main_id >= 0) {
- mutex_lock(&hl_devs_idr_lock);
- idr_remove(&hl_devs_idr, main_id);
- mutex_unlock(&hl_devs_idr_lock);
- }
-
- rc = -EBUSY;
- goto free_hdev;
- }
-
- hdev->id = main_id;
- hdev->id_control = ctrl_id;
-
- *dev = hdev;
-
- return 0;
-
-free_hdev:
- kfree(hdev);
- return rc;
-}
-
-/*
- * destroy_hdev - destroy habanalabs device instance
- *
- * @dev: pointer to the habanalabs device structure
- *
- */
-void destroy_hdev(struct hl_device *hdev)
-{
- /* Remove device from the device list */
- mutex_lock(&hl_devs_idr_lock);
- idr_remove(&hl_devs_idr, hdev->id);
- idr_remove(&hl_devs_idr, hdev->id_control);
- mutex_unlock(&hl_devs_idr_lock);
-
- kfree(hdev);
-}
-
-static int hl_pmops_suspend(struct device *dev)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- pr_debug("Going to suspend PCI device\n");
-
- if (!hdev) {
- pr_err("device pointer is NULL in suspend\n");
- return 0;
- }
-
- return hl_device_suspend(hdev);
-}
-
-static int hl_pmops_resume(struct device *dev)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- pr_debug("Going to resume PCI device\n");
-
- if (!hdev) {
- pr_err("device pointer is NULL in resume\n");
- return 0;
- }
-
- return hl_device_resume(hdev);
-}
-
-/*
- * hl_pci_probe - probe PCI habanalabs devices
- *
- * @pdev: pointer to pci device
- * @id: pointer to pci device id structure
- *
- * Standard PCI probe function for habanalabs device.
- * Create a new habanalabs device and initialize it according to the
- * device's type
- */
-static int hl_pci_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
-{
- struct hl_device *hdev;
- int rc;
-
- dev_info(&pdev->dev, HL_NAME
- " device found [%04x:%04x] (rev %x)\n",
- (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
-
- rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
- if (rc)
- return rc;
-
- pci_set_drvdata(pdev, hdev);
-
- rc = hl_device_init(hdev, hl_class);
- if (rc) {
- dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
- rc = -ENODEV;
- goto disable_device;
- }
-
- return 0;
-
-disable_device:
- pci_set_drvdata(pdev, NULL);
- destroy_hdev(hdev);
-
- return rc;
-}
-
-/*
- * hl_pci_remove - remove PCI habanalabs devices
- *
- * @pdev: pointer to pci device
- *
- * Standard PCI remove function for habanalabs device
- */
-static void hl_pci_remove(struct pci_dev *pdev)
-{
- struct hl_device *hdev;
-
- hdev = pci_get_drvdata(pdev);
- if (!hdev)
- return;
-
- hl_device_fini(hdev);
- pci_set_drvdata(pdev, NULL);
-
- destroy_hdev(hdev);
-}
-
-static const struct dev_pm_ops hl_pm_ops = {
- .suspend = hl_pmops_suspend,
- .resume = hl_pmops_resume,
-};
-
-static struct pci_driver hl_pci_driver = {
- .name = HL_NAME,
- .id_table = ids,
- .probe = hl_pci_probe,
- .remove = hl_pci_remove,
- .driver.pm = &hl_pm_ops,
-};
-
-/*
- * hl_init - Initialize the habanalabs kernel driver
- */
-static int __init hl_init(void)
-{
- int rc;
- dev_t dev;
-
- pr_info("loading driver\n");
-
- rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
- if (rc < 0) {
- pr_err("unable to get major\n");
- return rc;
- }
-
- hl_major = MAJOR(dev);
-
- hl_class = class_create(THIS_MODULE, HL_NAME);
- if (IS_ERR(hl_class)) {
- pr_err("failed to allocate class\n");
- rc = PTR_ERR(hl_class);
- goto remove_major;
- }
-
- hl_debugfs_init();
-
- rc = pci_register_driver(&hl_pci_driver);
- if (rc) {
- pr_err("failed to register pci device\n");
- goto remove_debugfs;
- }
-
- pr_debug("driver loaded\n");
-
- return 0;
-
-remove_debugfs:
- hl_debugfs_fini();
- class_destroy(hl_class);
-remove_major:
- unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
- return rc;
-}
-
-/*
- * hl_exit - Release all resources of the habanalabs kernel driver
- */
-static void __exit hl_exit(void)
-{
- pci_unregister_driver(&hl_pci_driver);
-
- /*
- * Removing debugfs must be after all devices or simulator devices
- * have been removed because otherwise we get a bug in the
- * debugfs module for referencing NULL objects
- */
- hl_debugfs_fini();
-
- class_destroy(hl_class);
- unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
-
- idr_destroy(&hl_devs_idr);
-
- pr_debug("driver removed\n");
-}
-
-module_init(hl_init);
-module_exit(hl_exit);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-
-static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
- [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
- [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
- [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm),
- [HL_DEBUG_OP_FUNNEL] = 0,
- [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon),
- [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu),
- [HL_DEBUG_OP_TIMESTAMP] = 0
-
-};
-
-static int device_status_info(struct hl_device *hdev, struct hl_info_args *args)
-{
- struct hl_info_device_status dev_stat = {0};
- u32 size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
- if ((!size) || (!out))
- return -EINVAL;
-
- dev_stat.status = hl_device_status(hdev);
-
- return copy_to_user(out, &dev_stat,
- min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0;
-}
-
-static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
-{
- struct hl_info_hw_ip_info hw_ip = {0};
- u32 size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 sram_kmd_size, dram_kmd_size;
-
- if ((!size) || (!out))
- return -EINVAL;
-
- sram_kmd_size = (prop->sram_user_base_address -
- prop->sram_base_address);
- dram_kmd_size = (prop->dram_user_base_address -
- prop->dram_base_address);
-
- hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
- hw_ip.sram_base_address = prop->sram_user_base_address;
- hw_ip.dram_base_address = prop->dram_user_base_address;
- hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
- hw_ip.sram_size = prop->sram_size - sram_kmd_size;
- hw_ip.dram_size = prop->dram_size - dram_kmd_size;
- if (hw_ip.dram_size > PAGE_SIZE)
- hw_ip.dram_enabled = 1;
- hw_ip.num_of_events = prop->num_of_events;
-
- memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
- min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
-
- memcpy(hw_ip.card_name, prop->armcp_info.card_name,
- min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
-
- hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
- hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location);
-
- hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
- hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
- hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
- hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
-
- return copy_to_user(out, &hw_ip,
- min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
-}
-
-static int hw_events_info(struct hl_device *hdev, bool aggregate,
- struct hl_info_args *args)
-{
- u32 size, max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
- void *arr;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
-
- return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
-}
-
-static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_info_dram_usage dram_usage = {0};
- u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 dram_kmd_size;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- dram_kmd_size = (prop->dram_user_base_address -
- prop->dram_base_address);
- dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) -
- atomic64_read(&hdev->dram_used_mem);
- if (hpriv->ctx)
- dram_usage.ctx_dram_mem =
- atomic64_read(&hpriv->ctx->dram_phys_mem);
-
- return copy_to_user(out, &dram_usage,
- min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0;
-}
-
-static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
-{
- struct hl_info_hw_idle hw_idle = {0};
- u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
- &hw_idle.busy_engines_mask, NULL);
-
- return copy_to_user(out, &hw_idle,
- min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
-}
-
-static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
-{
- struct hl_debug_params *params;
- void *input = NULL, *output = NULL;
- int rc;
-
- params = kzalloc(sizeof(*params), GFP_KERNEL);
- if (!params)
- return -ENOMEM;
-
- params->reg_idx = args->reg_idx;
- params->enable = args->enable;
- params->op = args->op;
-
- if (args->input_ptr && args->input_size) {
- input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL);
- if (!input) {
- rc = -ENOMEM;
- goto out;
- }
-
- if (copy_from_user(input, u64_to_user_ptr(args->input_ptr),
- args->input_size)) {
- rc = -EFAULT;
- dev_err(hdev->dev, "failed to copy input debug data\n");
- goto out;
- }
-
- params->input = input;
- }
-
- if (args->output_ptr && args->output_size) {
- output = kzalloc(args->output_size, GFP_KERNEL);
- if (!output) {
- rc = -ENOMEM;
- goto out;
- }
-
- params->output = output;
- params->output_size = args->output_size;
- }
-
- rc = hdev->asic_funcs->debug_coresight(hdev, params);
- if (rc) {
- dev_err(hdev->dev,
- "debug coresight operation failed %d\n", rc);
- goto out;
- }
-
- if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr,
- output, args->output_size)) {
- dev_err(hdev->dev, "copy to user failed in debug ioctl\n");
- rc = -EFAULT;
- goto out;
- }
-
-
-out:
- kfree(params);
- kfree(output);
- kfree(input);
-
- return rc;
-}
-
-static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
-{
- struct hl_info_device_utilization device_util = {0};
- u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- if ((args->period_ms < 100) || (args->period_ms > 1000) ||
- (args->period_ms % 100)) {
- dev_err(hdev->dev,
- "period %u must be between 100 - 1000 and must be divisible by 100\n",
- args->period_ms);
- return -EINVAL;
- }
-
- device_util.utilization = hl_device_utilization(hdev, args->period_ms);
-
- return copy_to_user(out, &device_util,
- min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
-}
-
-static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
-{
- struct hl_info_clk_rate clk_rate = {0};
- u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
- int rc;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
- &clk_rate.max_clk_rate_mhz);
- if (rc)
- return rc;
-
- return copy_to_user(out, &clk_rate,
- min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
-}
-
-static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
-{
- struct hl_info_reset_count reset_count = {0};
- u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
- reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
-
- return copy_to_user(out, &reset_count,
- min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
-}
-
-static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
-{
- struct hl_info_time_sync time_sync = {0};
- u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
- time_sync.host_time = ktime_get_raw_ns();
-
- return copy_to_user(out, &time_sync,
- min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
-}
-
-static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_info_cs_counters cs_counters = {0};
- u32 max_size = args->return_size;
- void __user *out = (void __user *) (uintptr_t) args->return_pointer;
-
- if ((!max_size) || (!out))
- return -EINVAL;
-
- memcpy(&cs_counters.cs_counters, &hdev->aggregated_cs_counters,
- sizeof(struct hl_cs_counters));
-
- if (hpriv->ctx)
- memcpy(&cs_counters.ctx_cs_counters, &hpriv->ctx->cs_counters,
- sizeof(struct hl_cs_counters));
-
- return copy_to_user(out, &cs_counters,
- min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
-}
-
-static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
- struct device *dev)
-{
- struct hl_info_args *args = data;
- struct hl_device *hdev = hpriv->hdev;
- int rc;
-
- /*
- * Information is returned for the following opcodes even if the device
- * is disabled or in reset.
- */
- switch (args->op) {
- case HL_INFO_HW_IP_INFO:
- return hw_ip_info(hdev, args);
-
- case HL_INFO_DEVICE_STATUS:
- return device_status_info(hdev, args);
-
- case HL_INFO_RESET_COUNT:
- return get_reset_count(hdev, args);
-
- default:
- break;
- }
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_warn_ratelimited(dev,
- "Device is %s. Can't execute INFO IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
- return -EBUSY;
- }
-
- switch (args->op) {
- case HL_INFO_HW_EVENTS:
- rc = hw_events_info(hdev, false, args);
- break;
-
- case HL_INFO_DRAM_USAGE:
- rc = dram_usage_info(hpriv, args);
- break;
-
- case HL_INFO_HW_IDLE:
- rc = hw_idle(hdev, args);
- break;
-
- case HL_INFO_DEVICE_UTILIZATION:
- rc = device_utilization(hdev, args);
- break;
-
- case HL_INFO_HW_EVENTS_AGGREGATE:
- rc = hw_events_info(hdev, true, args);
- break;
-
- case HL_INFO_CLK_RATE:
- rc = get_clk_rate(hdev, args);
- break;
-
- case HL_INFO_TIME_SYNC:
- return time_sync_info(hdev, args);
-
- case HL_INFO_CS_COUNTERS:
- return cs_counters_info(hpriv, args);
-
- default:
- dev_err(dev, "Invalid request %d\n", args->op);
- rc = -ENOTTY;
- break;
- }
-
- return rc;
-}
-
-static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
-{
- return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
-}
-
-static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
-{
- return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
-}
-
-static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
-{
- struct hl_debug_args *args = data;
- struct hl_device *hdev = hpriv->hdev;
- int rc = 0;
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_warn_ratelimited(hdev->dev,
- "Device is %s. Can't execute DEBUG IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
- return -EBUSY;
- }
-
- switch (args->op) {
- case HL_DEBUG_OP_ETR:
- case HL_DEBUG_OP_ETF:
- case HL_DEBUG_OP_STM:
- case HL_DEBUG_OP_FUNNEL:
- case HL_DEBUG_OP_BMON:
- case HL_DEBUG_OP_SPMU:
- case HL_DEBUG_OP_TIMESTAMP:
- if (!hdev->in_debug) {
- dev_err_ratelimited(hdev->dev,
- "Rejecting debug configuration request because device not in debug mode\n");
- return -EFAULT;
- }
- args->input_size =
- min(args->input_size, hl_debug_struct_size[args->op]);
- rc = debug_coresight(hdev, args);
- break;
- case HL_DEBUG_OP_SET_MODE:
- rc = hl_device_set_debug_mode(hdev, (bool) args->enable);
- break;
- default:
- dev_err(hdev->dev, "Invalid request %d\n", args->op);
- rc = -ENOTTY;
- break;
- }
-
- return rc;
-}
-
-#define HL_IOCTL_DEF(ioctl, _func) \
- [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
-
-static const struct hl_ioctl_desc hl_ioctls[] = {
- HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
-};
-
-static const struct hl_ioctl_desc hl_ioctls_control[] = {
- HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
-};
-
-static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
- const struct hl_ioctl_desc *ioctl, struct device *dev)
-{
- struct hl_fpriv *hpriv = filep->private_data;
- struct hl_device *hdev = hpriv->hdev;
- unsigned int nr = _IOC_NR(cmd);
- char stack_kdata[128] = {0};
- char *kdata = NULL;
- unsigned int usize, asize;
- hl_ioctl_t *func;
- u32 hl_size;
- int retcode;
-
- if (hdev->hard_reset_pending) {
- dev_crit_ratelimited(hdev->dev_ctrl,
- "Device HARD reset pending! Please close FD\n");
- return -ENODEV;
- }
-
- /* Do not trust userspace, use our own definition */
- func = ioctl->func;
-
- if (unlikely(!func)) {
- dev_dbg(dev, "no function\n");
- retcode = -ENOTTY;
- goto out_err;
- }
-
- hl_size = _IOC_SIZE(ioctl->cmd);
- usize = asize = _IOC_SIZE(cmd);
- if (hl_size > asize)
- asize = hl_size;
-
- cmd = ioctl->cmd;
-
- if (cmd & (IOC_IN | IOC_OUT)) {
- if (asize <= sizeof(stack_kdata)) {
- kdata = stack_kdata;
- } else {
- kdata = kzalloc(asize, GFP_KERNEL);
- if (!kdata) {
- retcode = -ENOMEM;
- goto out_err;
- }
- }
- }
-
- if (cmd & IOC_IN) {
- if (copy_from_user(kdata, (void __user *)arg, usize)) {
- retcode = -EFAULT;
- goto out_err;
- }
- } else if (cmd & IOC_OUT) {
- memset(kdata, 0, usize);
- }
-
- retcode = func(hpriv, kdata);
-
- if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize))
- retcode = -EFAULT;
-
-out_err:
- if (retcode)
- dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
- task_pid_nr(current), cmd, nr);
-
- if (kdata != stack_kdata)
- kfree(kdata);
-
- return retcode;
-}
-
-long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
-{
- struct hl_fpriv *hpriv = filep->private_data;
- struct hl_device *hdev = hpriv->hdev;
- const struct hl_ioctl_desc *ioctl = NULL;
- unsigned int nr = _IOC_NR(cmd);
-
- if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
- ioctl = &hl_ioctls[nr];
- } else {
- dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
- task_pid_nr(current), nr);
- return -ENOTTY;
- }
-
- return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
-}
-
-long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
-{
- struct hl_fpriv *hpriv = filep->private_data;
- struct hl_device *hdev = hpriv->hdev;
- const struct hl_ioctl_desc *ioctl = NULL;
- unsigned int nr = _IOC_NR(cmd);
-
- if (nr == _IOC_NR(HL_IOCTL_INFO)) {
- ioctl = &hl_ioctls_control[nr];
- } else {
- dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
- task_pid_nr(current), nr);
- return -ENOTTY;
- }
-
- return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-/*
- * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
- *
- * @ptr: the current pi/ci value
- * @val: the amount to add
- *
- * Add val to ptr. It can go until twice the queue length.
- */
-inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
-{
- ptr += val;
- ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
- return ptr;
-}
-static inline int queue_ci_get(atomic_t *ci, u32 queue_len)
-{
- return atomic_read(ci) & ((queue_len << 1) - 1);
-}
-
-static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
-{
- int delta = (q->pi - queue_ci_get(&q->ci, queue_len));
-
- if (delta >= 0)
- return (queue_len - delta);
- else
- return (abs(delta) - queue_len);
-}
-
-void hl_int_hw_queue_update_ci(struct hl_cs *cs)
-{
- struct hl_device *hdev = cs->ctx->hdev;
- struct hl_hw_queue *q;
- int i;
-
- if (hdev->disabled)
- return;
-
- q = &hdev->kernel_queues[0];
- for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
- if (q->queue_type == QUEUE_TYPE_INT)
- atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
- }
-}
-
-/*
- * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
- * H/W queue.
- * @hdev: pointer to habanalabs device structure
- * @q: pointer to habanalabs queue structure
- * @ctl: BD's control word
- * @len: BD's length
- * @ptr: BD's pointer
- *
- * This function assumes there is enough space on the queue to submit a new
- * BD to it. It initializes the next BD and calls the device specific
- * function to set the pi (and doorbell)
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
- struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
-{
- struct hl_bd *bd;
-
- bd = (struct hl_bd *) (uintptr_t) q->kernel_address;
- bd += hl_pi_2_offset(q->pi);
- bd->ctl = cpu_to_le32(ctl);
- bd->len = cpu_to_le32(len);
- bd->ptr = cpu_to_le64(ptr);
-
- q->pi = hl_queue_inc_ptr(q->pi);
- hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
-}
-
-/*
- * ext_queue_sanity_checks - perform some sanity checks on external queue
- *
- * @hdev : pointer to hl_device structure
- * @q : pointer to hl_hw_queue structure
- * @num_of_entries : how many entries to check for space
- * @reserve_cq_entry : whether to reserve an entry in the cq
- *
- * H/W queues spinlock should be taken before calling this function
- *
- * Perform the following:
- * - Make sure we have enough space in the h/w queue
- * - Make sure we have enough space in the completion queue
- * - Reserve space in the completion queue (needs to be reversed if there
- * is a failure down the road before the actual submission of work). Only
- * do this action if reserve_cq_entry is true
- *
- */
-static int ext_queue_sanity_checks(struct hl_device *hdev,
- struct hl_hw_queue *q, int num_of_entries,
- bool reserve_cq_entry)
-{
- atomic_t *free_slots =
- &hdev->completion_queue[q->cq_id].free_slots_cnt;
- int free_slots_cnt;
-
- /* Check we have enough space in the queue */
- free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
-
- if (free_slots_cnt < num_of_entries) {
- dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
- q->hw_queue_id, num_of_entries);
- return -EAGAIN;
- }
-
- if (reserve_cq_entry) {
- /*
- * Check we have enough space in the completion queue
- * Add -1 to counter (decrement) unless counter was already 0
- * In that case, CQ is full so we can't submit a new CB because
- * we won't get ack on its completion
- * atomic_add_unless will return 0 if counter was already 0
- */
- if (atomic_add_negative(num_of_entries * -1, free_slots)) {
- dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
- num_of_entries, q->hw_queue_id);
- atomic_add(num_of_entries, free_slots);
- return -EAGAIN;
- }
- }
-
- return 0;
-}
-
-/*
- * int_queue_sanity_checks - perform some sanity checks on internal queue
- *
- * @hdev : pointer to hl_device structure
- * @q : pointer to hl_hw_queue structure
- * @num_of_entries : how many entries to check for space
- *
- * H/W queues spinlock should be taken before calling this function
- *
- * Perform the following:
- * - Make sure we have enough space in the h/w queue
- *
- */
-static int int_queue_sanity_checks(struct hl_device *hdev,
- struct hl_hw_queue *q,
- int num_of_entries)
-{
- int free_slots_cnt;
-
- if (num_of_entries > q->int_queue_len) {
- dev_err(hdev->dev,
- "Cannot populate queue %u with %u jobs\n",
- q->hw_queue_id, num_of_entries);
- return -ENOMEM;
- }
-
- /* Check we have enough space in the queue */
- free_slots_cnt = queue_free_slots(q, q->int_queue_len);
-
- if (free_slots_cnt < num_of_entries) {
- dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
- q->hw_queue_id, num_of_entries);
- return -EAGAIN;
- }
-
- return 0;
-}
-
-/*
- * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
- * @hdev: Pointer to hl_device structure.
- * @q: Pointer to hl_hw_queue structure.
- * @num_of_entries: How many entries to check for space.
- *
- * Notice: We do not reserve queue entries so this function mustn't be called
- * more than once per CS for the same queue
- *
- */
-static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
- int num_of_entries)
-{
- int free_slots_cnt;
-
- /* Check we have enough space in the queue */
- free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
-
- if (free_slots_cnt < num_of_entries) {
- dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
- q->hw_queue_id, num_of_entries);
- return -EAGAIN;
- }
-
- return 0;
-}
-
-/*
- * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
- *
- * @hdev: pointer to hl_device structure
- * @hw_queue_id: Queue's type
- * @cb_size: size of CB
- * @cb_ptr: pointer to CB location
- *
- * This function sends a single CB, that must NOT generate a completion entry
- *
- */
-int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
- u32 cb_size, u64 cb_ptr)
-{
- struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
- int rc = 0;
-
- /*
- * The CPU queue is a synchronous queue with an effective depth of
- * a single entry (although it is allocated with room for multiple
- * entries). Therefore, there is a different lock, called
- * send_cpu_message_lock, that serializes accesses to the CPU queue.
- * As a result, we don't need to lock the access to the entire H/W
- * queues module when submitting a JOB to the CPU queue
- */
- if (q->queue_type != QUEUE_TYPE_CPU)
- hdev->asic_funcs->hw_queues_lock(hdev);
-
- if (hdev->disabled) {
- rc = -EPERM;
- goto out;
- }
-
- /*
- * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
- * type only on init phase, when the queues are empty and being tested,
- * so there is no need for sanity checks.
- */
- if (q->queue_type != QUEUE_TYPE_HW) {
- rc = ext_queue_sanity_checks(hdev, q, 1, false);
- if (rc)
- goto out;
- }
-
- ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
-
-out:
- if (q->queue_type != QUEUE_TYPE_CPU)
- hdev->asic_funcs->hw_queues_unlock(hdev);
-
- return rc;
-}
-
-/*
- * ext_queue_schedule_job - submit a JOB to an external queue
- *
- * @job: pointer to the job that needs to be submitted to the queue
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void ext_queue_schedule_job(struct hl_cs_job *job)
-{
- struct hl_device *hdev = job->cs->ctx->hdev;
- struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
- struct hl_cq_entry cq_pkt;
- struct hl_cq *cq;
- u64 cq_addr;
- struct hl_cb *cb;
- u32 ctl;
- u32 len;
- u64 ptr;
-
- /*
- * Update the JOB ID inside the BD CTL so the device would know what
- * to write in the completion queue
- */
- ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
-
- cb = job->patched_cb;
- len = job->job_cb_size;
- ptr = cb->bus_address;
-
- cq_pkt.data = cpu_to_le32(
- ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
- & CQ_ENTRY_SHADOW_INDEX_MASK) |
- (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
- (1 << CQ_ENTRY_READY_SHIFT));
-
- /*
- * No need to protect pi_offset because scheduling to the
- * H/W queues is done under the scheduler mutex
- *
- * No need to check if CQ is full because it was already
- * checked in ext_queue_sanity_checks
- */
- cq = &hdev->completion_queue[q->cq_id];
- cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
-
- hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
- cq_addr,
- le32_to_cpu(cq_pkt.data),
- q->msi_vec,
- job->contains_dma_pkt);
-
- q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
-
- cq->pi = hl_cq_inc_ptr(cq->pi);
-
- ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
-}
-
-/*
- * int_queue_schedule_job - submit a JOB to an internal queue
- *
- * @job: pointer to the job that needs to be submitted to the queue
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void int_queue_schedule_job(struct hl_cs_job *job)
-{
- struct hl_device *hdev = job->cs->ctx->hdev;
- struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
- struct hl_bd bd;
- __le64 *pi;
-
- bd.ctl = 0;
- bd.len = cpu_to_le32(job->job_cb_size);
- bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
-
- pi = (__le64 *) (uintptr_t) (q->kernel_address +
- ((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
-
- q->pi++;
- q->pi &= ((q->int_queue_len << 1) - 1);
-
- hdev->asic_funcs->pqe_write(hdev, pi, &bd);
-
- hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
-}
-
-/*
- * hw_queue_schedule_job - submit a JOB to a H/W queue
- *
- * @job: pointer to the job that needs to be submitted to the queue
- *
- * This function must be called when the scheduler mutex is taken
- *
- */
-static void hw_queue_schedule_job(struct hl_cs_job *job)
-{
- struct hl_device *hdev = job->cs->ctx->hdev;
- struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
- u64 ptr;
- u32 offset, ctl, len;
-
- /*
- * Upon PQE completion, COMP_DATA is used as the write data to the
- * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
- * write address offset in the SM block (QMAN LBW message).
- * The write address offset is calculated as "COMP_OFFSET << 2".
- */
- offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
- ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
- ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
-
- len = job->job_cb_size;
-
- /*
- * A patched CB is created only if a user CB was allocated by driver and
- * MMU is disabled. If MMU is enabled, the user CB should be used
- * instead. If the user CB wasn't allocated by driver, assume that it
- * holds an address.
- */
- if (job->patched_cb)
- ptr = job->patched_cb->bus_address;
- else if (job->is_kernel_allocated_cb)
- ptr = job->user_cb->bus_address;
- else
- ptr = (u64) (uintptr_t) job->user_cb;
-
- ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
-}
-
-/*
- * init_signal_wait_cs - initialize a signal/wait CS
- * @cs: pointer to the signal/wait CS
- *
- * H/W queues spinlock should be taken before calling this function
- */
-static void init_signal_wait_cs(struct hl_cs *cs)
-{
- struct hl_ctx *ctx = cs->ctx;
- struct hl_device *hdev = ctx->hdev;
- struct hl_hw_queue *hw_queue;
- struct hl_cs_compl *cs_cmpl =
- container_of(cs->fence, struct hl_cs_compl, base_fence);
-
- struct hl_hw_sob *hw_sob;
- struct hl_cs_job *job;
- u32 q_idx;
-
- /* There is only one job in a signal/wait CS */
- job = list_first_entry(&cs->job_list, struct hl_cs_job,
- cs_node);
- q_idx = job->hw_queue_id;
- hw_queue = &hdev->kernel_queues[q_idx];
-
- if (cs->type & CS_TYPE_SIGNAL) {
- hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset];
-
- cs_cmpl->hw_sob = hw_sob;
- cs_cmpl->sob_val = hw_queue->next_sob_val++;
-
- dev_dbg(hdev->dev,
- "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
- cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
-
- hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
- cs_cmpl->hw_sob->sob_id);
-
- kref_get(&hw_sob->kref);
-
- /* check for wraparound */
- if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) {
- /*
- * Decrement as we reached the max value.
- * The release function won't be called here as we've
- * just incremented the refcount.
- */
- kref_put(&hw_sob->kref, hl_sob_reset_error);
- hw_queue->next_sob_val = 1;
- /* only two SOBs are currently in use */
- hw_queue->curr_sob_offset =
- (hw_queue->curr_sob_offset + 1) %
- HL_RSVD_SOBS_IN_USE;
-
- dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
- hw_queue->curr_sob_offset, q_idx);
- }
- } else if (cs->type & CS_TYPE_WAIT) {
- struct hl_cs_compl *signal_cs_cmpl;
-
- signal_cs_cmpl = container_of(cs->signal_fence,
- struct hl_cs_compl,
- base_fence);
-
- /* copy the the SOB id and value of the signal CS */
- cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
- cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
-
- dev_dbg(hdev->dev,
- "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
- cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
- hw_queue->base_mon_id, q_idx);
-
- hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb,
- cs_cmpl->hw_sob->sob_id,
- cs_cmpl->sob_val,
- hw_queue->base_mon_id,
- q_idx);
-
- kref_get(&cs_cmpl->hw_sob->kref);
- /*
- * Must put the signal fence after the SOB refcnt increment so
- * the SOB refcnt won't turn 0 and reset the SOB before the
- * wait CS was submitted.
- */
- mb();
- dma_fence_put(cs->signal_fence);
- cs->signal_fence = NULL;
- }
-}
-
-/*
- * hl_hw_queue_schedule_cs - schedule a command submission
- * @cs: pointer to the CS
- */
-int hl_hw_queue_schedule_cs(struct hl_cs *cs)
-{
- struct hl_ctx *ctx = cs->ctx;
- struct hl_device *hdev = ctx->hdev;
- struct hl_cs_job *job, *tmp;
- struct hl_hw_queue *q;
- u32 max_queues;
- int rc = 0, i, cq_cnt;
-
- hdev->asic_funcs->hw_queues_lock(hdev);
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- ctx->cs_counters.device_in_reset_drop_cnt++;
- dev_err(hdev->dev,
- "device is disabled or in reset, CS rejected!\n");
- rc = -EPERM;
- goto out;
- }
-
- max_queues = hdev->asic_prop.max_queues;
-
- q = &hdev->kernel_queues[0];
- for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) {
- if (cs->jobs_in_queue_cnt[i]) {
- switch (q->queue_type) {
- case QUEUE_TYPE_EXT:
- rc = ext_queue_sanity_checks(hdev, q,
- cs->jobs_in_queue_cnt[i], true);
- break;
- case QUEUE_TYPE_INT:
- rc = int_queue_sanity_checks(hdev, q,
- cs->jobs_in_queue_cnt[i]);
- break;
- case QUEUE_TYPE_HW:
- rc = hw_queue_sanity_checks(hdev, q,
- cs->jobs_in_queue_cnt[i]);
- break;
- default:
- dev_err(hdev->dev, "Queue type %d is invalid\n",
- q->queue_type);
- rc = -EINVAL;
- break;
- }
-
- if (rc) {
- ctx->cs_counters.queue_full_drop_cnt++;
- goto unroll_cq_resv;
- }
-
- if (q->queue_type == QUEUE_TYPE_EXT)
- cq_cnt++;
- }
- }
-
- if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT))
- init_signal_wait_cs(cs);
-
- spin_lock(&hdev->hw_queues_mirror_lock);
- list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
-
- /* Queue TDR if the CS is the first entry and if timeout is wanted */
- if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
- (list_first_entry(&hdev->hw_queues_mirror_list,
- struct hl_cs, mirror_node) == cs)) {
- cs->tdr_active = true;
- schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
- spin_unlock(&hdev->hw_queues_mirror_lock);
- } else {
- spin_unlock(&hdev->hw_queues_mirror_lock);
- }
-
- if (!hdev->cs_active_cnt++) {
- struct hl_device_idle_busy_ts *ts;
-
- ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
- ts->busy_to_idle_ts = ktime_set(0, 0);
- ts->idle_to_busy_ts = ktime_get();
- }
-
- list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
- switch (job->queue_type) {
- case QUEUE_TYPE_EXT:
- ext_queue_schedule_job(job);
- break;
- case QUEUE_TYPE_INT:
- int_queue_schedule_job(job);
- break;
- case QUEUE_TYPE_HW:
- hw_queue_schedule_job(job);
- break;
- default:
- break;
- }
-
- cs->submitted = true;
-
- goto out;
-
-unroll_cq_resv:
- q = &hdev->kernel_queues[0];
- for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
- if ((q->queue_type == QUEUE_TYPE_EXT) &&
- (cs->jobs_in_queue_cnt[i])) {
- atomic_t *free_slots =
- &hdev->completion_queue[i].free_slots_cnt;
- atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
- cq_cnt--;
- }
- }
-
-out:
- hdev->asic_funcs->hw_queues_unlock(hdev);
-
- return rc;
-}
-
-/*
- * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
- *
- * @hdev: pointer to hl_device structure
- * @hw_queue_id: which queue to increment its ci
- */
-void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
-{
- struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
-
- atomic_inc(&q->ci);
-}
-
-static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
- bool is_cpu_queue)
-{
- void *p;
- int rc;
-
- if (is_cpu_queue)
- p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- &q->bus_address);
- else
- p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- &q->bus_address,
- GFP_KERNEL | __GFP_ZERO);
- if (!p)
- return -ENOMEM;
-
- q->kernel_address = (u64) (uintptr_t) p;
-
- q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
- sizeof(*q->shadow_queue),
- GFP_KERNEL);
- if (!q->shadow_queue) {
- dev_err(hdev->dev,
- "Failed to allocate shadow queue for H/W queue %d\n",
- q->hw_queue_id);
- rc = -ENOMEM;
- goto free_queue;
- }
-
- /* Make sure read/write pointers are initialized to start of queue */
- atomic_set(&q->ci, 0);
- q->pi = 0;
-
- return 0;
-
-free_queue:
- if (is_cpu_queue)
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address);
- else
- hdev->asic_funcs->asic_dma_free_coherent(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address,
- q->bus_address);
-
- return rc;
-}
-
-static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
- void *p;
-
- p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
- &q->bus_address, &q->int_queue_len);
- if (!p) {
- dev_err(hdev->dev,
- "Failed to get base address for internal queue %d\n",
- q->hw_queue_id);
- return -EFAULT;
- }
-
- q->kernel_address = (u64) (uintptr_t) p;
- q->pi = 0;
- atomic_set(&q->ci, 0);
-
- return 0;
-}
-
-static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
- return ext_and_cpu_queue_init(hdev, q, true);
-}
-
-static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
- return ext_and_cpu_queue_init(hdev, q, false);
-}
-
-static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
-{
- void *p;
-
- p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- &q->bus_address,
- GFP_KERNEL | __GFP_ZERO);
- if (!p)
- return -ENOMEM;
-
- q->kernel_address = (u64) (uintptr_t) p;
-
- /* Make sure read/write pointers are initialized to start of queue */
- atomic_set(&q->ci, 0);
- q->pi = 0;
-
- return 0;
-}
-
-static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
-{
- struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_hw_sob *hw_sob;
- int sob, queue_idx = hdev->sync_stream_queue_idx++;
-
- hw_queue->base_sob_id =
- prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS;
- hw_queue->base_mon_id =
- prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS;
- hw_queue->next_sob_val = 1;
- hw_queue->curr_sob_offset = 0;
-
- for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
- hw_sob = &hw_queue->hw_sob[sob];
- hw_sob->hdev = hdev;
- hw_sob->sob_id = hw_queue->base_sob_id + sob;
- hw_sob->q_idx = q_idx;
- kref_init(&hw_sob->kref);
- }
-}
-
-static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx)
-{
- struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
-
- /*
- * In case we got here due to a stuck CS, the refcnt might be bigger
- * than 1 and therefore we reset it.
- */
- kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
- hw_queue->curr_sob_offset = 0;
- hw_queue->next_sob_val = 1;
-}
-
-/*
- * queue_init - main initialization function for H/W queue object
- *
- * @hdev: pointer to hl_device device structure
- * @q: pointer to hl_hw_queue queue structure
- * @hw_queue_id: The id of the H/W queue
- *
- * Allocate dma-able memory for the queue and initialize fields
- * Returns 0 on success
- */
-static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
- u32 hw_queue_id)
-{
- int rc;
-
- q->hw_queue_id = hw_queue_id;
-
- switch (q->queue_type) {
- case QUEUE_TYPE_EXT:
- rc = ext_queue_init(hdev, q);
- break;
- case QUEUE_TYPE_INT:
- rc = int_queue_init(hdev, q);
- break;
- case QUEUE_TYPE_CPU:
- rc = cpu_queue_init(hdev, q);
- break;
- case QUEUE_TYPE_HW:
- rc = hw_queue_init(hdev, q);
- break;
- case QUEUE_TYPE_NA:
- q->valid = 0;
- return 0;
- default:
- dev_crit(hdev->dev, "wrong queue type %d during init\n",
- q->queue_type);
- rc = -EINVAL;
- break;
- }
-
- if (q->supports_sync_stream)
- sync_stream_queue_init(hdev, q->hw_queue_id);
-
- if (rc)
- return rc;
-
- q->valid = 1;
-
- return 0;
-}
-
-/*
- * hw_queue_fini - destroy queue
- *
- * @hdev: pointer to hl_device device structure
- * @q: pointer to hl_hw_queue queue structure
- *
- * Free the queue memory
- */
-static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
-{
- if (!q->valid)
- return;
-
- /*
- * If we arrived here, there are no jobs waiting on this queue
- * so we can safely remove it.
- * This is because this function can only called when:
- * 1. Either a context is deleted, which only can occur if all its
- * jobs were finished
- * 2. A context wasn't able to be created due to failure or timeout,
- * which means there are no jobs on the queue yet
- *
- * The only exception are the queues of the kernel context, but
- * if they are being destroyed, it means that the entire module is
- * being removed. If the module is removed, it means there is no open
- * user context. It also means that if a job was submitted by
- * the kernel driver (e.g. context creation), the job itself was
- * released by the kernel driver when a timeout occurred on its
- * Completion. Thus, we don't need to release it again.
- */
-
- if (q->queue_type == QUEUE_TYPE_INT)
- return;
-
- kfree(q->shadow_queue);
-
- if (q->queue_type == QUEUE_TYPE_CPU)
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address);
- else
- hdev->asic_funcs->asic_dma_free_coherent(hdev,
- HL_QUEUE_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address,
- q->bus_address);
-}
-
-int hl_hw_queues_create(struct hl_device *hdev)
-{
- struct asic_fixed_properties *asic = &hdev->asic_prop;
- struct hl_hw_queue *q;
- int i, rc, q_ready_cnt;
-
- hdev->kernel_queues = kcalloc(asic->max_queues,
- sizeof(*hdev->kernel_queues), GFP_KERNEL);
-
- if (!hdev->kernel_queues) {
- dev_err(hdev->dev, "Not enough memory for H/W queues\n");
- return -ENOMEM;
- }
-
- /* Initialize the H/W queues */
- for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
- i < asic->max_queues ; i++, q_ready_cnt++, q++) {
-
- q->queue_type = asic->hw_queues_props[i].type;
- q->supports_sync_stream =
- asic->hw_queues_props[i].supports_sync_stream;
- rc = queue_init(hdev, q, i);
- if (rc) {
- dev_err(hdev->dev,
- "failed to initialize queue %d\n", i);
- goto release_queues;
- }
- }
-
- return 0;
-
-release_queues:
- for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
- queue_fini(hdev, q);
-
- kfree(hdev->kernel_queues);
-
- return rc;
-}
-
-void hl_hw_queues_destroy(struct hl_device *hdev)
-{
- struct hl_hw_queue *q;
- u32 max_queues = hdev->asic_prop.max_queues;
- int i;
-
- for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++)
- queue_fini(hdev, q);
-
- kfree(hdev->kernel_queues);
-}
-
-void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
-{
- struct hl_hw_queue *q;
- u32 max_queues = hdev->asic_prop.max_queues;
- int i;
-
- for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) {
- if ((!q->valid) ||
- ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
- continue;
- q->pi = 0;
- atomic_set(&q->ci, 0);
-
- if (q->supports_sync_stream)
- sync_stream_queue_reset(hdev, q->hw_queue_id);
- }
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-#include <linux/hwmon.h>
-
-#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */
-#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1)
-
-int hl_build_hwmon_channel_info(struct hl_device *hdev,
- struct armcp_sensor *sensors_arr)
-{
- u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
- u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
- u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0};
- struct hwmon_channel_info **channels_info;
- u32 num_sensors_for_type, num_active_sensor_types = 0,
- arr_size = 0, *curr_arr;
- enum hwmon_sensor_types type;
- int rc, i, j;
-
- for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
- type = le32_to_cpu(sensors_arr[i].type);
-
- if ((type == 0) && (sensors_arr[i].flags == 0))
- break;
-
- if (type >= HWMON_NR_SENSOR_TYPES) {
- dev_err(hdev->dev,
- "Got wrong sensor type %d from device\n", type);
- return -EINVAL;
- }
-
- counts[type]++;
- arr_size++;
- }
-
- for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
- if (counts[i] == 0)
- continue;
-
- num_sensors_for_type = counts[i] + 1;
- curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr),
- GFP_KERNEL);
- if (!curr_arr) {
- rc = -ENOMEM;
- goto sensors_type_err;
- }
-
- num_active_sensor_types++;
- sensors_by_type[i] = curr_arr;
- }
-
- for (i = 0 ; i < arr_size ; i++) {
- type = le32_to_cpu(sensors_arr[i].type);
- curr_arr = sensors_by_type[type];
- curr_arr[sensors_by_type_next_index[type]++] =
- le32_to_cpu(sensors_arr[i].flags);
- }
-
- channels_info = kcalloc(num_active_sensor_types + 1,
- sizeof(*channels_info), GFP_KERNEL);
- if (!channels_info) {
- rc = -ENOMEM;
- goto channels_info_array_err;
- }
-
- for (i = 0 ; i < num_active_sensor_types ; i++) {
- channels_info[i] = kzalloc(sizeof(*channels_info[i]),
- GFP_KERNEL);
- if (!channels_info[i]) {
- rc = -ENOMEM;
- goto channel_info_err;
- }
- }
-
- for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
- if (!sensors_by_type[i])
- continue;
-
- channels_info[j]->type = i;
- channels_info[j]->config = sensors_by_type[i];
- j++;
- }
-
- hdev->hl_chip_info->info =
- (const struct hwmon_channel_info **)channels_info;
-
- return 0;
-
-channel_info_err:
- for (i = 0 ; i < num_active_sensor_types ; i++)
- if (channels_info[i]) {
- kfree(channels_info[i]->config);
- kfree(channels_info[i]);
- }
- kfree(channels_info);
-channels_info_array_err:
-sensors_type_err:
- for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++)
- kfree(sensors_by_type[i]);
-
- return rc;
-}
-
-static int hl_read(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long *val)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
- int rc;
-
- if (hl_device_disabled_or_in_reset(hdev))
- return -ENODEV;
-
- switch (type) {
- case hwmon_temp:
- switch (attr) {
- case hwmon_temp_input:
- case hwmon_temp_max:
- case hwmon_temp_crit:
- case hwmon_temp_max_hyst:
- case hwmon_temp_crit_hyst:
- case hwmon_temp_offset:
- case hwmon_temp_highest:
- break;
- default:
- return -EINVAL;
- }
-
- rc = hl_get_temperature(hdev, channel, attr, val);
- break;
- case hwmon_in:
- switch (attr) {
- case hwmon_in_input:
- case hwmon_in_min:
- case hwmon_in_max:
- case hwmon_in_highest:
- break;
- default:
- return -EINVAL;
- }
-
- rc = hl_get_voltage(hdev, channel, attr, val);
- break;
- case hwmon_curr:
- switch (attr) {
- case hwmon_curr_input:
- case hwmon_curr_min:
- case hwmon_curr_max:
- case hwmon_curr_highest:
- break;
- default:
- return -EINVAL;
- }
-
- rc = hl_get_current(hdev, channel, attr, val);
- break;
- case hwmon_fan:
- switch (attr) {
- case hwmon_fan_input:
- case hwmon_fan_min:
- case hwmon_fan_max:
- break;
- default:
- return -EINVAL;
- }
- rc = hl_get_fan_speed(hdev, channel, attr, val);
- break;
- case hwmon_pwm:
- switch (attr) {
- case hwmon_pwm_input:
- case hwmon_pwm_enable:
- break;
- default:
- return -EINVAL;
- }
- rc = hl_get_pwm_info(hdev, channel, attr, val);
- break;
- default:
- return -EINVAL;
- }
- return rc;
-}
-
-static int hl_write(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long val)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- if (hl_device_disabled_or_in_reset(hdev))
- return -ENODEV;
-
- switch (type) {
- case hwmon_temp:
- switch (attr) {
- case hwmon_temp_offset:
- case hwmon_temp_reset_history:
- break;
- default:
- return -EINVAL;
- }
- hl_set_temperature(hdev, channel, attr, val);
- break;
- case hwmon_pwm:
- switch (attr) {
- case hwmon_pwm_input:
- case hwmon_pwm_enable:
- break;
- default:
- return -EINVAL;
- }
- hl_set_pwm_info(hdev, channel, attr, val);
- break;
- case hwmon_in:
- switch (attr) {
- case hwmon_in_reset_history:
- break;
- default:
- return -EINVAL;
- }
- hl_set_voltage(hdev, channel, attr, val);
- break;
- case hwmon_curr:
- switch (attr) {
- case hwmon_curr_reset_history:
- break;
- default:
- return -EINVAL;
- }
- hl_set_current(hdev, channel, attr, val);
- break;
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
- u32 attr, int channel)
-{
- switch (type) {
- case hwmon_temp:
- switch (attr) {
- case hwmon_temp_input:
- case hwmon_temp_max:
- case hwmon_temp_max_hyst:
- case hwmon_temp_crit:
- case hwmon_temp_crit_hyst:
- case hwmon_temp_highest:
- return 0444;
- case hwmon_temp_offset:
- return 0644;
- case hwmon_temp_reset_history:
- return 0200;
- }
- break;
- case hwmon_in:
- switch (attr) {
- case hwmon_in_input:
- case hwmon_in_min:
- case hwmon_in_max:
- case hwmon_in_highest:
- return 0444;
- case hwmon_in_reset_history:
- return 0200;
- }
- break;
- case hwmon_curr:
- switch (attr) {
- case hwmon_curr_input:
- case hwmon_curr_min:
- case hwmon_curr_max:
- case hwmon_curr_highest:
- return 0444;
- case hwmon_curr_reset_history:
- return 0200;
- }
- break;
- case hwmon_fan:
- switch (attr) {
- case hwmon_fan_input:
- case hwmon_fan_min:
- case hwmon_fan_max:
- return 0444;
- }
- break;
- case hwmon_pwm:
- switch (attr) {
- case hwmon_pwm_input:
- case hwmon_pwm_enable:
- return 0644;
- }
- break;
- default:
- break;
- }
- return 0;
-}
-
-static const struct hwmon_ops hl_hwmon_ops = {
- .is_visible = hl_is_visible,
- .read = hl_read,
- .write = hl_write
-};
-
-int hl_get_temperature(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, value);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to get temperature from sensor %d, error %d\n",
- sensor_index, rc);
- *value = 0;
- }
-
- return rc;
-}
-
-int hl_set_temperature(struct hl_device *hdev,
- int sensor_index, u32 attr, long value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
- pkt.value = __cpu_to_le64(value);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, NULL);
-
- if (rc)
- dev_err(hdev->dev,
- "Failed to set temperature of sensor %d, error %d\n",
- sensor_index, rc);
-
- return rc;
-}
-
-int hl_get_voltage(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, value);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to get voltage from sensor %d, error %d\n",
- sensor_index, rc);
- *value = 0;
- }
-
- return rc;
-}
-
-int hl_get_current(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, value);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to get current from sensor %d, error %d\n",
- sensor_index, rc);
- *value = 0;
- }
-
- return rc;
-}
-
-int hl_get_fan_speed(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, value);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to get fan speed from sensor %d, error %d\n",
- sensor_index, rc);
- *value = 0;
- }
-
- return rc;
-}
-
-int hl_get_pwm_info(struct hl_device *hdev,
- int sensor_index, u32 attr, long *value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, value);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to get pwm info from sensor %d, error %d\n",
- sensor_index, rc);
- *value = 0;
- }
-
- return rc;
-}
-
-void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
- long value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
- pkt.value = cpu_to_le64(value);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, NULL);
-
- if (rc)
- dev_err(hdev->dev,
- "Failed to set pwm info to sensor %d, error %d\n",
- sensor_index, rc);
-}
-
-int hl_set_voltage(struct hl_device *hdev,
- int sensor_index, u32 attr, long value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
- pkt.value = __cpu_to_le64(value);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, NULL);
-
- if (rc)
- dev_err(hdev->dev,
- "Failed to set voltage of sensor %d, error %d\n",
- sensor_index, rc);
-
- return rc;
-}
-
-int hl_set_current(struct hl_device *hdev,
- int sensor_index, u32 attr, long value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.sensor_index = __cpu_to_le16(sensor_index);
- pkt.type = __cpu_to_le16(attr);
- pkt.value = __cpu_to_le64(value);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SENSORS_PKT_TIMEOUT, NULL);
-
- if (rc)
- dev_err(hdev->dev,
- "Failed to set current of sensor %d, error %d\n",
- sensor_index, rc);
-
- return rc;
-}
-
-int hl_hwmon_init(struct hl_device *hdev)
-{
- struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- int rc;
-
- if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
- return 0;
-
- if (hdev->hl_chip_info->info) {
- hdev->hl_chip_info->ops = &hl_hwmon_ops;
-
- hdev->hwmon_dev = hwmon_device_register_with_info(dev,
- prop->armcp_info.card_name, hdev,
- hdev->hl_chip_info, NULL);
- if (IS_ERR(hdev->hwmon_dev)) {
- rc = PTR_ERR(hdev->hwmon_dev);
- dev_err(hdev->dev,
- "Unable to register hwmon device: %d\n", rc);
- return rc;
- }
-
- dev_info(hdev->dev, "%s: add sensors information\n",
- dev_name(hdev->hwmon_dev));
-
- hdev->hwmon_initialized = true;
- } else {
- dev_info(hdev->dev, "no available sensors\n");
- }
-
- return 0;
-}
-
-void hl_hwmon_fini(struct hl_device *hdev)
-{
- if (!hdev->hwmon_initialized)
- return;
-
- hwmon_device_unregister(hdev->hwmon_dev);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2020 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef ARMCP_IF_H
-#define ARMCP_IF_H
-
-#include <linux/types.h>
-
-/*
- * EVENT QUEUE
- */
-
-struct hl_eq_header {
- __le32 reserved;
- __le32 ctl;
-};
-
-struct hl_eq_ecc_data {
- __le64 ecc_address;
- __le64 ecc_syndrom;
- __u8 memory_wrapper_idx;
- __u8 pad[7];
-};
-
-struct hl_eq_entry {
- struct hl_eq_header hdr;
- union {
- struct hl_eq_ecc_data ecc_data;
- __le64 data[7];
- };
-};
-
-#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry)
-
-#define EQ_CTL_READY_SHIFT 31
-#define EQ_CTL_READY_MASK 0x80000000
-
-#define EQ_CTL_EVENT_TYPE_SHIFT 16
-#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000
-
-enum pq_init_status {
- PQ_INIT_STATUS_NA = 0,
- PQ_INIT_STATUS_READY_FOR_CP,
- PQ_INIT_STATUS_READY_FOR_HOST,
- PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
-};
-
-/*
- * ArmCP Primary Queue Packets
- *
- * During normal operation, the host's kernel driver needs to send various
- * messages to ArmCP, usually either to SET some value into a H/W periphery or
- * to GET the current value of some H/W periphery. For example, SET the
- * frequency of MME/TPC and GET the value of the thermal sensor.
- *
- * These messages can be initiated either by the User application or by the
- * host's driver itself, e.g. power management code. In either case, the
- * communication from the host's driver to ArmCP will *always* be in
- * synchronous mode, meaning that the host will send a single message and poll
- * until the message was acknowledged and the results are ready (if results are
- * needed).
- *
- * This means that only a single message can be sent at a time and the host's
- * driver must wait for its result before sending the next message. Having said
- * that, because these are control messages which are sent in a relatively low
- * frequency, this limitation seems acceptable. It's important to note that
- * in case of multiple devices, messages to different devices *can* be sent
- * at the same time.
- *
- * The message, inputs/outputs (if relevant) and fence object will be located
- * on the device DDR at an address that will be determined by the host's driver.
- * During device initialization phase, the host will pass to ArmCP that address.
- * Most of the message types will contain inputs/outputs inside the message
- * itself. The common part of each message will contain the opcode of the
- * message (its type) and a field representing a fence object.
- *
- * When the host's driver wishes to send a message to ArmCP, it will write the
- * message contents to the device DDR, clear the fence object and then write the
- * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
- * the 484 interrupt-id to the ARM core.
- *
- * Upon receiving the 484 interrupt-id, ArmCP will read the message from the
- * DDR. In case the message is a SET operation, ArmCP will first perform the
- * operation and then write to the fence object on the device DDR. In case the
- * message is a GET operation, ArmCP will first fill the results section on the
- * device DDR and then write to the fence object. If an error occurred, ArmCP
- * will fill the rc field with the right error code.
- *
- * In the meantime, the host's driver will poll on the fence object. Once the
- * host sees that the fence object is signaled, it will read the results from
- * the device DDR (if relevant) and resume the code execution in the host's
- * driver.
- *
- * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
- * so the value being put by the host's driver matches the value read by ArmCP
- *
- * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
- *
- * Detailed description:
- *
- * ARMCP_PACKET_DISABLE_PCI_ACCESS -
- * After receiving this packet the embedded CPU must NOT issue PCI
- * transactions (read/write) towards the Host CPU. This also include
- * sending MSI-X interrupts.
- * This packet is usually sent before the device is moved to D3Hot state.
- *
- * ARMCP_PACKET_ENABLE_PCI_ACCESS -
- * After receiving this packet the embedded CPU is allowed to issue PCI
- * transactions towards the Host CPU, including sending MSI-X interrupts.
- * This packet is usually send after the device is moved to D0 state.
- *
- * ARMCP_PACKET_TEMPERATURE_GET -
- * Fetch the current temperature / Max / Max Hyst / Critical /
- * Critical Hyst of a specified thermal sensor. The packet's
- * arguments specify the desired sensor and the field to get.
- *
- * ARMCP_PACKET_VOLTAGE_GET -
- * Fetch the voltage / Max / Min of a specified sensor. The packet's
- * arguments specify the sensor and type.
- *
- * ARMCP_PACKET_CURRENT_GET -
- * Fetch the current / Max / Min of a specified sensor. The packet's
- * arguments specify the sensor and type.
- *
- * ARMCP_PACKET_FAN_SPEED_GET -
- * Fetch the speed / Max / Min of a specified fan. The packet's
- * arguments specify the sensor and type.
- *
- * ARMCP_PACKET_PWM_GET -
- * Fetch the pwm value / mode of a specified pwm. The packet's
- * arguments specify the sensor and type.
- *
- * ARMCP_PACKET_PWM_SET -
- * Set the pwm value / mode of a specified pwm. The packet's
- * arguments specify the sensor, type and value.
- *
- * ARMCP_PACKET_FREQUENCY_SET -
- * Set the frequency of a specified PLL. The packet's arguments specify
- * the PLL and the desired frequency. The actual frequency in the device
- * might differ from the requested frequency.
- *
- * ARMCP_PACKET_FREQUENCY_GET -
- * Fetch the frequency of a specified PLL. The packet's arguments specify
- * the PLL.
- *
- * ARMCP_PACKET_LED_SET -
- * Set the state of a specified led. The packet's arguments
- * specify the led and the desired state.
- *
- * ARMCP_PACKET_I2C_WR -
- * Write 32-bit value to I2C device. The packet's arguments specify the
- * I2C bus, address and value.
- *
- * ARMCP_PACKET_I2C_RD -
- * Read 32-bit value from I2C device. The packet's arguments specify the
- * I2C bus and address.
- *
- * ARMCP_PACKET_INFO_GET -
- * Fetch information from the device as specified in the packet's
- * structure. The host's driver passes the max size it allows the ArmCP to
- * write to the structure, to prevent data corruption in case of
- * mismatched driver/FW versions.
- *
- * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
- *
- * ARMCP_PACKET_UNMASK_RAZWI_IRQ -
- * Unmask the given IRQ. The IRQ number is specified in the value field.
- * The packet is sent after receiving an interrupt and printing its
- * relevant information.
- *
- * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
- * Unmask the given IRQs. The IRQs numbers are specified in an array right
- * after the armcp_packet structure, where its first element is the array
- * length. The packet is sent after a soft reset was done in order to
- * handle any interrupts that were sent during the reset process.
- *
- * ARMCP_PACKET_TEST -
- * Test packet for ArmCP connectivity. The CPU will put the fence value
- * in the result field.
- *
- * ARMCP_PACKET_FREQUENCY_CURR_GET -
- * Fetch the current frequency of a specified PLL. The packet's arguments
- * specify the PLL.
- *
- * ARMCP_PACKET_MAX_POWER_GET -
- * Fetch the maximal power of the device.
- *
- * ARMCP_PACKET_MAX_POWER_SET -
- * Set the maximal power of the device. The packet's arguments specify
- * the power.
- *
- * ARMCP_PACKET_EEPROM_DATA_GET -
- * Get EEPROM data from the ArmCP kernel. The buffer is specified in the
- * addr field. The CPU will put the returned data size in the result
- * field. In addition, the host's driver passes the max size it allows the
- * ArmCP to write to the structure, to prevent data corruption in case of
- * mismatched driver/FW versions.
- *
- * ARMCP_PACKET_TEMPERATURE_SET -
- * Set the value of the offset property of a specified thermal sensor.
- * The packet's arguments specify the desired sensor and the field to
- * set.
- *
- * ARMCP_PACKET_VOLTAGE_SET -
- * Trigger the reset_history property of a specified voltage sensor.
- * The packet's arguments specify the desired sensor and the field to
- * set.
- *
- * ARMCP_PACKET_CURRENT_SET -
- * Trigger the reset_history property of a specified current sensor.
- * The packet's arguments specify the desired sensor and the field to
- * set.
- */
-
-enum armcp_packet_id {
- ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */
- ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */
- ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */
- ARMCP_PACKET_VOLTAGE_GET, /* sysfs */
- ARMCP_PACKET_CURRENT_GET, /* sysfs */
- ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */
- ARMCP_PACKET_PWM_GET, /* sysfs */
- ARMCP_PACKET_PWM_SET, /* sysfs */
- ARMCP_PACKET_FREQUENCY_SET, /* sysfs */
- ARMCP_PACKET_FREQUENCY_GET, /* sysfs */
- ARMCP_PACKET_LED_SET, /* debugfs */
- ARMCP_PACKET_I2C_WR, /* debugfs */
- ARMCP_PACKET_I2C_RD, /* debugfs */
- ARMCP_PACKET_INFO_GET, /* IOCTL */
- ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
- ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */
- ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */
- ARMCP_PACKET_TEST, /* internal */
- ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */
- ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
- ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
- ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
- ARMCP_RESERVED,
- ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
- ARMCP_PACKET_VOLTAGE_SET, /* sysfs */
- ARMCP_PACKET_CURRENT_SET, /* sysfs */
-};
-
-#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
-
-#define ARMCP_PKT_CTL_RC_SHIFT 12
-#define ARMCP_PKT_CTL_RC_MASK 0x0000F000
-
-#define ARMCP_PKT_CTL_OPCODE_SHIFT 16
-#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000
-
-struct armcp_packet {
- union {
- __le64 value; /* For SET packets */
- __le64 result; /* For GET packets */
- __le64 addr; /* For PQ */
- };
-
- __le32 ctl;
-
- __le32 fence; /* Signal to host that message is completed */
-
- union {
- struct {/* For temperature/current/voltage/fan/pwm get/set */
- __le16 sensor_index;
- __le16 type;
- };
-
- struct { /* For I2C read/write */
- __u8 i2c_bus;
- __u8 i2c_addr;
- __u8 i2c_reg;
- __u8 pad; /* unused */
- };
-
- /* For frequency get/set */
- __le32 pll_index;
-
- /* For led set */
- __le32 led_index;
-
- /* For get Armcp info/EEPROM data */
- __le32 data_max_size;
- };
-
- __le32 reserved;
-};
-
-struct armcp_unmask_irq_arr_packet {
- struct armcp_packet armcp_pkt;
- __le32 length;
- __le32 irqs[0];
-};
-
-enum armcp_packet_rc {
- armcp_packet_success,
- armcp_packet_invalid,
- armcp_packet_fault
-};
-
-/*
- * armcp_temp_type should adhere to hwmon_temp_attributes
- * defined in Linux kernel hwmon.h file
- */
-enum armcp_temp_type {
- armcp_temp_input,
- armcp_temp_max = 6,
- armcp_temp_max_hyst,
- armcp_temp_crit,
- armcp_temp_crit_hyst,
- armcp_temp_offset = 19,
- armcp_temp_highest = 22,
- armcp_temp_reset_history = 23
-};
-
-enum armcp_in_attributes {
- armcp_in_input,
- armcp_in_min,
- armcp_in_max,
- armcp_in_highest = 7,
- armcp_in_reset_history
-};
-
-enum armcp_curr_attributes {
- armcp_curr_input,
- armcp_curr_min,
- armcp_curr_max,
- armcp_curr_highest = 7,
- armcp_curr_reset_history
-};
-
-enum armcp_fan_attributes {
- armcp_fan_input,
- armcp_fan_min = 2,
- armcp_fan_max
-};
-
-enum armcp_pwm_attributes {
- armcp_pwm_input,
- armcp_pwm_enable
-};
-
-/* Event Queue Packets */
-
-struct eq_generic_event {
- __le64 data[7];
-};
-
-/*
- * ArmCP info
- */
-
-#define CARD_NAME_MAX_LEN 16
-#define VERSION_MAX_LEN 128
-#define ARMCP_MAX_SENSORS 128
-
-struct armcp_sensor {
- __le32 type;
- __le32 flags;
-};
-
-/**
- * struct armcp_card_types - ASIC card type.
- * @armcp_card_type_pci: PCI card.
- * @armcp_card_type_pmc: PCI Mezzanine Card.
- */
-enum armcp_card_types {
- armcp_card_type_pci,
- armcp_card_type_pmc
-};
-
-/**
- * struct armcp_info - Info from ArmCP that is necessary to the host's driver
- * @sensors: available sensors description.
- * @kernel_version: ArmCP linux kernel version.
- * @reserved: reserved field.
- * @card_type: card configuration type.
- * @card_location: in a server, each card has different connections topology
- * depending on its location (relevant for PMC card type)
- * @cpld_version: CPLD programmed F/W version.
- * @infineon_version: Infineon main DC-DC version.
- * @fuse_version: silicon production FUSE information.
- * @thermal_version: thermald S/W version.
- * @armcp_version: ArmCP S/W version.
- * @dram_size: available DRAM size.
- * @card_name: card name that will be displayed in HWMON subsystem on the host
- */
-struct armcp_info {
- struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
- __u8 kernel_version[VERSION_MAX_LEN];
- __le32 reserved;
- __le32 card_type;
- __le32 card_location;
- __le32 cpld_version;
- __le32 infineon_version;
- __u8 fuse_version[VERSION_MAX_LEN];
- __u8 thermal_version[VERSION_MAX_LEN];
- __u8 armcp_version[VERSION_MAX_LEN];
- __le64 dram_size;
- char card_name[CARD_NAME_MAX_LEN];
-};
-
-#endif /* ARMCP_IF_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef ARMCP_IF_H
+#define ARMCP_IF_H
+
+#include <linux/types.h>
+
+/*
+ * EVENT QUEUE
+ */
+
+struct hl_eq_header {
+ __le32 reserved;
+ __le32 ctl;
+};
+
+struct hl_eq_ecc_data {
+ __le64 ecc_address;
+ __le64 ecc_syndrom;
+ __u8 memory_wrapper_idx;
+ __u8 pad[7];
+};
+
+struct hl_eq_entry {
+ struct hl_eq_header hdr;
+ union {
+ struct hl_eq_ecc_data ecc_data;
+ __le64 data[7];
+ };
+};
+
+#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry)
+
+#define EQ_CTL_READY_SHIFT 31
+#define EQ_CTL_READY_MASK 0x80000000
+
+#define EQ_CTL_EVENT_TYPE_SHIFT 16
+#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000
+
+enum pq_init_status {
+ PQ_INIT_STATUS_NA = 0,
+ PQ_INIT_STATUS_READY_FOR_CP,
+ PQ_INIT_STATUS_READY_FOR_HOST,
+ PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
+};
+
+/*
+ * ArmCP Primary Queue Packets
+ *
+ * During normal operation, the host's kernel driver needs to send various
+ * messages to ArmCP, usually either to SET some value into a H/W periphery or
+ * to GET the current value of some H/W periphery. For example, SET the
+ * frequency of MME/TPC and GET the value of the thermal sensor.
+ *
+ * These messages can be initiated either by the User application or by the
+ * host's driver itself, e.g. power management code. In either case, the
+ * communication from the host's driver to ArmCP will *always* be in
+ * synchronous mode, meaning that the host will send a single message and poll
+ * until the message was acknowledged and the results are ready (if results are
+ * needed).
+ *
+ * This means that only a single message can be sent at a time and the host's
+ * driver must wait for its result before sending the next message. Having said
+ * that, because these are control messages which are sent in a relatively low
+ * frequency, this limitation seems acceptable. It's important to note that
+ * in case of multiple devices, messages to different devices *can* be sent
+ * at the same time.
+ *
+ * The message, inputs/outputs (if relevant) and fence object will be located
+ * on the device DDR at an address that will be determined by the host's driver.
+ * During device initialization phase, the host will pass to ArmCP that address.
+ * Most of the message types will contain inputs/outputs inside the message
+ * itself. The common part of each message will contain the opcode of the
+ * message (its type) and a field representing a fence object.
+ *
+ * When the host's driver wishes to send a message to ArmCP, it will write the
+ * message contents to the device DDR, clear the fence object and then write the
+ * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
+ * the 484 interrupt-id to the ARM core.
+ *
+ * Upon receiving the 484 interrupt-id, ArmCP will read the message from the
+ * DDR. In case the message is a SET operation, ArmCP will first perform the
+ * operation and then write to the fence object on the device DDR. In case the
+ * message is a GET operation, ArmCP will first fill the results section on the
+ * device DDR and then write to the fence object. If an error occurred, ArmCP
+ * will fill the rc field with the right error code.
+ *
+ * In the meantime, the host's driver will poll on the fence object. Once the
+ * host sees that the fence object is signaled, it will read the results from
+ * the device DDR (if relevant) and resume the code execution in the host's
+ * driver.
+ *
+ * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
+ * so the value being put by the host's driver matches the value read by ArmCP
+ *
+ * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
+ *
+ * Detailed description:
+ *
+ * ARMCP_PACKET_DISABLE_PCI_ACCESS -
+ * After receiving this packet the embedded CPU must NOT issue PCI
+ * transactions (read/write) towards the Host CPU. This also include
+ * sending MSI-X interrupts.
+ * This packet is usually sent before the device is moved to D3Hot state.
+ *
+ * ARMCP_PACKET_ENABLE_PCI_ACCESS -
+ * After receiving this packet the embedded CPU is allowed to issue PCI
+ * transactions towards the Host CPU, including sending MSI-X interrupts.
+ * This packet is usually send after the device is moved to D0 state.
+ *
+ * ARMCP_PACKET_TEMPERATURE_GET -
+ * Fetch the current temperature / Max / Max Hyst / Critical /
+ * Critical Hyst of a specified thermal sensor. The packet's
+ * arguments specify the desired sensor and the field to get.
+ *
+ * ARMCP_PACKET_VOLTAGE_GET -
+ * Fetch the voltage / Max / Min of a specified sensor. The packet's
+ * arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_CURRENT_GET -
+ * Fetch the current / Max / Min of a specified sensor. The packet's
+ * arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_FAN_SPEED_GET -
+ * Fetch the speed / Max / Min of a specified fan. The packet's
+ * arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_PWM_GET -
+ * Fetch the pwm value / mode of a specified pwm. The packet's
+ * arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_PWM_SET -
+ * Set the pwm value / mode of a specified pwm. The packet's
+ * arguments specify the sensor, type and value.
+ *
+ * ARMCP_PACKET_FREQUENCY_SET -
+ * Set the frequency of a specified PLL. The packet's arguments specify
+ * the PLL and the desired frequency. The actual frequency in the device
+ * might differ from the requested frequency.
+ *
+ * ARMCP_PACKET_FREQUENCY_GET -
+ * Fetch the frequency of a specified PLL. The packet's arguments specify
+ * the PLL.
+ *
+ * ARMCP_PACKET_LED_SET -
+ * Set the state of a specified led. The packet's arguments
+ * specify the led and the desired state.
+ *
+ * ARMCP_PACKET_I2C_WR -
+ * Write 32-bit value to I2C device. The packet's arguments specify the
+ * I2C bus, address and value.
+ *
+ * ARMCP_PACKET_I2C_RD -
+ * Read 32-bit value from I2C device. The packet's arguments specify the
+ * I2C bus and address.
+ *
+ * ARMCP_PACKET_INFO_GET -
+ * Fetch information from the device as specified in the packet's
+ * structure. The host's driver passes the max size it allows the ArmCP to
+ * write to the structure, to prevent data corruption in case of
+ * mismatched driver/FW versions.
+ *
+ * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
+ *
+ * ARMCP_PACKET_UNMASK_RAZWI_IRQ -
+ * Unmask the given IRQ. The IRQ number is specified in the value field.
+ * The packet is sent after receiving an interrupt and printing its
+ * relevant information.
+ *
+ * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
+ * Unmask the given IRQs. The IRQs numbers are specified in an array right
+ * after the armcp_packet structure, where its first element is the array
+ * length. The packet is sent after a soft reset was done in order to
+ * handle any interrupts that were sent during the reset process.
+ *
+ * ARMCP_PACKET_TEST -
+ * Test packet for ArmCP connectivity. The CPU will put the fence value
+ * in the result field.
+ *
+ * ARMCP_PACKET_FREQUENCY_CURR_GET -
+ * Fetch the current frequency of a specified PLL. The packet's arguments
+ * specify the PLL.
+ *
+ * ARMCP_PACKET_MAX_POWER_GET -
+ * Fetch the maximal power of the device.
+ *
+ * ARMCP_PACKET_MAX_POWER_SET -
+ * Set the maximal power of the device. The packet's arguments specify
+ * the power.
+ *
+ * ARMCP_PACKET_EEPROM_DATA_GET -
+ * Get EEPROM data from the ArmCP kernel. The buffer is specified in the
+ * addr field. The CPU will put the returned data size in the result
+ * field. In addition, the host's driver passes the max size it allows the
+ * ArmCP to write to the structure, to prevent data corruption in case of
+ * mismatched driver/FW versions.
+ *
+ * ARMCP_PACKET_TEMPERATURE_SET -
+ * Set the value of the offset property of a specified thermal sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
+ *
+ * ARMCP_PACKET_VOLTAGE_SET -
+ * Trigger the reset_history property of a specified voltage sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
+ *
+ * ARMCP_PACKET_CURRENT_SET -
+ * Trigger the reset_history property of a specified current sensor.
+ * The packet's arguments specify the desired sensor and the field to
+ * set.
+ */
+
+enum armcp_packet_id {
+ ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */
+ ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */
+ ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */
+ ARMCP_PACKET_VOLTAGE_GET, /* sysfs */
+ ARMCP_PACKET_CURRENT_GET, /* sysfs */
+ ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */
+ ARMCP_PACKET_PWM_GET, /* sysfs */
+ ARMCP_PACKET_PWM_SET, /* sysfs */
+ ARMCP_PACKET_FREQUENCY_SET, /* sysfs */
+ ARMCP_PACKET_FREQUENCY_GET, /* sysfs */
+ ARMCP_PACKET_LED_SET, /* debugfs */
+ ARMCP_PACKET_I2C_WR, /* debugfs */
+ ARMCP_PACKET_I2C_RD, /* debugfs */
+ ARMCP_PACKET_INFO_GET, /* IOCTL */
+ ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
+ ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */
+ ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */
+ ARMCP_PACKET_TEST, /* internal */
+ ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */
+ ARMCP_PACKET_MAX_POWER_GET, /* sysfs */
+ ARMCP_PACKET_MAX_POWER_SET, /* sysfs */
+ ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */
+ ARMCP_RESERVED,
+ ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */
+ ARMCP_PACKET_VOLTAGE_SET, /* sysfs */
+ ARMCP_PACKET_CURRENT_SET, /* sysfs */
+};
+
+#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5
+
+#define ARMCP_PKT_CTL_RC_SHIFT 12
+#define ARMCP_PKT_CTL_RC_MASK 0x0000F000
+
+#define ARMCP_PKT_CTL_OPCODE_SHIFT 16
+#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000
+
+struct armcp_packet {
+ union {
+ __le64 value; /* For SET packets */
+ __le64 result; /* For GET packets */
+ __le64 addr; /* For PQ */
+ };
+
+ __le32 ctl;
+
+ __le32 fence; /* Signal to host that message is completed */
+
+ union {
+ struct {/* For temperature/current/voltage/fan/pwm get/set */
+ __le16 sensor_index;
+ __le16 type;
+ };
+
+ struct { /* For I2C read/write */
+ __u8 i2c_bus;
+ __u8 i2c_addr;
+ __u8 i2c_reg;
+ __u8 pad; /* unused */
+ };
+
+ /* For frequency get/set */
+ __le32 pll_index;
+
+ /* For led set */
+ __le32 led_index;
+
+ /* For get Armcp info/EEPROM data */
+ __le32 data_max_size;
+ };
+
+ __le32 reserved;
+};
+
+struct armcp_unmask_irq_arr_packet {
+ struct armcp_packet armcp_pkt;
+ __le32 length;
+ __le32 irqs[0];
+};
+
+enum armcp_packet_rc {
+ armcp_packet_success,
+ armcp_packet_invalid,
+ armcp_packet_fault
+};
+
+/*
+ * armcp_temp_type should adhere to hwmon_temp_attributes
+ * defined in Linux kernel hwmon.h file
+ */
+enum armcp_temp_type {
+ armcp_temp_input,
+ armcp_temp_max = 6,
+ armcp_temp_max_hyst,
+ armcp_temp_crit,
+ armcp_temp_crit_hyst,
+ armcp_temp_offset = 19,
+ armcp_temp_highest = 22,
+ armcp_temp_reset_history = 23
+};
+
+enum armcp_in_attributes {
+ armcp_in_input,
+ armcp_in_min,
+ armcp_in_max,
+ armcp_in_highest = 7,
+ armcp_in_reset_history
+};
+
+enum armcp_curr_attributes {
+ armcp_curr_input,
+ armcp_curr_min,
+ armcp_curr_max,
+ armcp_curr_highest = 7,
+ armcp_curr_reset_history
+};
+
+enum armcp_fan_attributes {
+ armcp_fan_input,
+ armcp_fan_min = 2,
+ armcp_fan_max
+};
+
+enum armcp_pwm_attributes {
+ armcp_pwm_input,
+ armcp_pwm_enable
+};
+
+/* Event Queue Packets */
+
+struct eq_generic_event {
+ __le64 data[7];
+};
+
+/*
+ * ArmCP info
+ */
+
+#define CARD_NAME_MAX_LEN 16
+#define VERSION_MAX_LEN 128
+#define ARMCP_MAX_SENSORS 128
+
+struct armcp_sensor {
+ __le32 type;
+ __le32 flags;
+};
+
+/**
+ * struct armcp_card_types - ASIC card type.
+ * @armcp_card_type_pci: PCI card.
+ * @armcp_card_type_pmc: PCI Mezzanine Card.
+ */
+enum armcp_card_types {
+ armcp_card_type_pci,
+ armcp_card_type_pmc
+};
+
+/**
+ * struct armcp_info - Info from ArmCP that is necessary to the host's driver
+ * @sensors: available sensors description.
+ * @kernel_version: ArmCP linux kernel version.
+ * @reserved: reserved field.
+ * @card_type: card configuration type.
+ * @card_location: in a server, each card has different connections topology
+ * depending on its location (relevant for PMC card type)
+ * @cpld_version: CPLD programmed F/W version.
+ * @infineon_version: Infineon main DC-DC version.
+ * @fuse_version: silicon production FUSE information.
+ * @thermal_version: thermald S/W version.
+ * @armcp_version: ArmCP S/W version.
+ * @dram_size: available DRAM size.
+ * @card_name: card name that will be displayed in HWMON subsystem on the host
+ */
+struct armcp_info {
+ struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
+ __u8 kernel_version[VERSION_MAX_LEN];
+ __le32 reserved;
+ __le32 card_type;
+ __le32 card_location;
+ __le32 cpld_version;
+ __le32 infineon_version;
+ __u8 fuse_version[VERSION_MAX_LEN];
+ __u8 thermal_version[VERSION_MAX_LEN];
+ __u8 armcp_version[VERSION_MAX_LEN];
+ __le64 dram_size;
+ char card_name[CARD_NAME_MAX_LEN];
+};
+
+#endif /* ARMCP_IF_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2018-2020 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef HL_BOOT_IF_H
+#define HL_BOOT_IF_H
+
+#define LKD_HARD_RESET_MAGIC 0xED7BD694
+#define HL_POWER9_HOST_MAGIC 0x1DA30009
+
+#define BOOT_FIT_SRAM_OFFSET 0x200000
+
+/*
+ * CPU error bits in BOOT_ERROR registers
+ *
+ * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed.
+ * DRAM is not reliable to use.
+ *
+ * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the
+ * image provided by the host has failed.
+ *
+ * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed.
+ * Boot continues as usual, but keep in
+ * mind this is a warning.
+ *
+ * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped.
+ * Skipping DRAM initialization has been
+ * requested (e.g. strap, command, etc.)
+ * and FW skipped the DRAM initialization.
+ * Host can initialize the DRAM.
+ *
+ * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped.
+ * Meaning the BMC data might not be
+ * available until reset.
+ *
+ * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready.
+ * BMC has not provided the NIC data yet.
+ * Once provided this bit will be cleared.
+ *
+ * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed.
+ * The NIC FW loading and initialization
+ * failed. This means NICs are not usable.
+ *
+ * CPU_BOOT_ERR0_ENABLED Error registers enabled.
+ * This is a main indication that the
+ * running FW populates the error
+ * registers. Meaning the error bits are
+ * not garbage, but actual error statuses.
+ */
+#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0)
+#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1)
+#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2)
+#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3)
+#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
+#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
+#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
+#define CPU_BOOT_ERR0_ENABLED (1 << 31)
+
+enum cpu_boot_status {
+ CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
+ CPU_BOOT_STATUS_IN_WFE = 1,
+ CPU_BOOT_STATUS_DRAM_RDY = 2,
+ CPU_BOOT_STATUS_SRAM_AVAIL = 3,
+ CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */
+ CPU_BOOT_STATUS_IN_PREBOOT = 5,
+ CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */
+ CPU_BOOT_STATUS_IN_UBOOT = 7,
+ CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */
+ /* U-Boot console prompt activated, commands are not processed */
+ CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
+ /* Finished NICs init, reported after DRAM and NICs */
+ CPU_BOOT_STATUS_NIC_FW_RDY = 11,
+ CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */
+ CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
+ /* Last boot loader progress status, ready to receive commands */
+ CPU_BOOT_STATUS_READY_TO_BOOT = 15,
+ CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
+};
+
+enum kmd_msg {
+ KMD_MSG_NA = 0,
+ KMD_MSG_GOTO_WFE,
+ KMD_MSG_FIT_RDY,
+ KMD_MSG_SKIP_BMC,
+};
+
+enum cpu_msg_status {
+ CPU_MSG_CLR = 0,
+ CPU_MSG_OK,
+ CPU_MSG_ERR,
+};
+
+#endif /* HL_BOOT_IF_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef QMAN_IF_H
+#define QMAN_IF_H
+
+#include <linux/types.h>
+
+/*
+ * PRIMARY QUEUE
+ */
+
+struct hl_bd {
+ __le64 ptr;
+ __le32 len;
+ __le32 ctl;
+};
+
+#define HL_BD_SIZE sizeof(struct hl_bd)
+
+/*
+ * S/W CTL FIELDS.
+ *
+ * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
+ * valid. 1 means the repeat field is valid, 0 means not-valid,
+ * i.e. repeat == 1
+ */
+#define BD_CTL_REPEAT_VALID_SHIFT 24
+#define BD_CTL_REPEAT_VALID_MASK 0x01000000
+
+#define BD_CTL_SHADOW_INDEX_SHIFT 0
+#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF
+
+/*
+ * H/W CTL FIELDS
+ */
+
+#define BD_CTL_COMP_OFFSET_SHIFT 16
+#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000
+
+#define BD_CTL_COMP_DATA_SHIFT 0
+#define BD_CTL_COMP_DATA_MASK 0x0000FFFF
+
+/*
+ * COMPLETION QUEUE
+ */
+
+struct hl_cq_entry {
+ __le32 data;
+};
+
+#define HL_CQ_ENTRY_SIZE sizeof(struct hl_cq_entry)
+
+#define CQ_ENTRY_READY_SHIFT 31
+#define CQ_ENTRY_READY_MASK 0x80000000
+
+#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT 30
+#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK 0x40000000
+
+#define CQ_ENTRY_SHADOW_INDEX_SHIFT BD_CTL_SHADOW_INDEX_SHIFT
+#define CQ_ENTRY_SHADOW_INDEX_MASK BD_CTL_SHADOW_INDEX_MASK
+
+
+#endif /* QMAN_IF_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2018-2020 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef HL_BOOT_IF_H
-#define HL_BOOT_IF_H
-
-#define LKD_HARD_RESET_MAGIC 0xED7BD694
-#define HL_POWER9_HOST_MAGIC 0x1DA30009
-
-#define BOOT_FIT_SRAM_OFFSET 0x200000
-
-/*
- * CPU error bits in BOOT_ERROR registers
- *
- * CPU_BOOT_ERR0_DRAM_INIT_FAIL DRAM initialization failed.
- * DRAM is not reliable to use.
- *
- * CPU_BOOT_ERR0_FIT_CORRUPTED FIT data integrity verification of the
- * image provided by the host has failed.
- *
- * CPU_BOOT_ERR0_TS_INIT_FAIL Thermal Sensor initialization failed.
- * Boot continues as usual, but keep in
- * mind this is a warning.
- *
- * CPU_BOOT_ERR0_DRAM_SKIPPED DRAM initialization has been skipped.
- * Skipping DRAM initialization has been
- * requested (e.g. strap, command, etc.)
- * and FW skipped the DRAM initialization.
- * Host can initialize the DRAM.
- *
- * CPU_BOOT_ERR0_BMC_WAIT_SKIPPED Waiting for BMC data will be skipped.
- * Meaning the BMC data might not be
- * available until reset.
- *
- * CPU_BOOT_ERR0_NIC_DATA_NOT_RDY NIC data from BMC is not ready.
- * BMC has not provided the NIC data yet.
- * Once provided this bit will be cleared.
- *
- * CPU_BOOT_ERR0_NIC_FW_FAIL NIC FW loading failed.
- * The NIC FW loading and initialization
- * failed. This means NICs are not usable.
- *
- * CPU_BOOT_ERR0_ENABLED Error registers enabled.
- * This is a main indication that the
- * running FW populates the error
- * registers. Meaning the error bits are
- * not garbage, but actual error statuses.
- */
-#define CPU_BOOT_ERR0_DRAM_INIT_FAIL (1 << 0)
-#define CPU_BOOT_ERR0_FIT_CORRUPTED (1 << 1)
-#define CPU_BOOT_ERR0_TS_INIT_FAIL (1 << 2)
-#define CPU_BOOT_ERR0_DRAM_SKIPPED (1 << 3)
-#define CPU_BOOT_ERR0_BMC_WAIT_SKIPPED (1 << 4)
-#define CPU_BOOT_ERR0_NIC_DATA_NOT_RDY (1 << 5)
-#define CPU_BOOT_ERR0_NIC_FW_FAIL (1 << 6)
-#define CPU_BOOT_ERR0_ENABLED (1 << 31)
-
-enum cpu_boot_status {
- CPU_BOOT_STATUS_NA = 0, /* Default value after reset of chip */
- CPU_BOOT_STATUS_IN_WFE = 1,
- CPU_BOOT_STATUS_DRAM_RDY = 2,
- CPU_BOOT_STATUS_SRAM_AVAIL = 3,
- CPU_BOOT_STATUS_IN_BTL = 4, /* BTL is H/W FSM */
- CPU_BOOT_STATUS_IN_PREBOOT = 5,
- CPU_BOOT_STATUS_IN_SPL, /* deprecated - not reported */
- CPU_BOOT_STATUS_IN_UBOOT = 7,
- CPU_BOOT_STATUS_DRAM_INIT_FAIL, /* deprecated - will be removed */
- CPU_BOOT_STATUS_FIT_CORRUPTED, /* deprecated - will be removed */
- /* U-Boot console prompt activated, commands are not processed */
- CPU_BOOT_STATUS_UBOOT_NOT_READY = 10,
- /* Finished NICs init, reported after DRAM and NICs */
- CPU_BOOT_STATUS_NIC_FW_RDY = 11,
- CPU_BOOT_STATUS_TS_INIT_FAIL, /* deprecated - will be removed */
- CPU_BOOT_STATUS_DRAM_SKIPPED, /* deprecated - will be removed */
- CPU_BOOT_STATUS_BMC_WAITING_SKIPPED, /* deprecated - will be removed */
- /* Last boot loader progress status, ready to receive commands */
- CPU_BOOT_STATUS_READY_TO_BOOT = 15,
- CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT = 16,
-};
-
-enum kmd_msg {
- KMD_MSG_NA = 0,
- KMD_MSG_GOTO_WFE,
- KMD_MSG_FIT_RDY,
- KMD_MSG_SKIP_BMC,
-};
-
-enum cpu_msg_status {
- CPU_MSG_CLR = 0,
- CPU_MSG_OK,
- CPU_MSG_ERR,
-};
-
-#endif /* HL_BOOT_IF_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright 2016-2018 HabanaLabs, Ltd.
- * All Rights Reserved.
- *
- */
-
-#ifndef QMAN_IF_H
-#define QMAN_IF_H
-
-#include <linux/types.h>
-
-/*
- * PRIMARY QUEUE
- */
-
-struct hl_bd {
- __le64 ptr;
- __le32 len;
- __le32 ctl;
-};
-
-#define HL_BD_SIZE sizeof(struct hl_bd)
-
-/*
- * S/W CTL FIELDS.
- *
- * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
- * valid. 1 means the repeat field is valid, 0 means not-valid,
- * i.e. repeat == 1
- */
-#define BD_CTL_REPEAT_VALID_SHIFT 24
-#define BD_CTL_REPEAT_VALID_MASK 0x01000000
-
-#define BD_CTL_SHADOW_INDEX_SHIFT 0
-#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF
-
-/*
- * H/W CTL FIELDS
- */
-
-#define BD_CTL_COMP_OFFSET_SHIFT 16
-#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000
-
-#define BD_CTL_COMP_DATA_SHIFT 0
-#define BD_CTL_COMP_DATA_MASK 0x0000FFFF
-
-/*
- * COMPLETION QUEUE
- */
-
-struct hl_cq_entry {
- __le32 data;
-};
-
-#define HL_CQ_ENTRY_SIZE sizeof(struct hl_cq_entry)
-
-#define CQ_ENTRY_READY_SHIFT 31
-#define CQ_ENTRY_READY_MASK 0x80000000
-
-#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT 30
-#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK 0x40000000
-
-#define CQ_ENTRY_SHADOW_INDEX_SHIFT BD_CTL_SHADOW_INDEX_SHIFT
-#define CQ_ENTRY_SHADOW_INDEX_MASK BD_CTL_SHADOW_INDEX_MASK
-
-
-#endif /* QMAN_IF_H */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/slab.h>
-
-/**
- * struct hl_eqe_work - This structure is used to schedule work of EQ
- * entry and armcp_reset event
- *
- * @eq_work: workqueue object to run when EQ entry is received
- * @hdev: pointer to device structure
- * @eq_entry: copy of the EQ entry
- */
-struct hl_eqe_work {
- struct work_struct eq_work;
- struct hl_device *hdev;
- struct hl_eq_entry eq_entry;
-};
-
-/**
- * hl_cq_inc_ptr - increment ci or pi of cq
- *
- * @ptr: the current ci or pi value of the completion queue
- *
- * Increment ptr by 1. If it reaches the number of completion queue
- * entries, set it to 0
- */
-inline u32 hl_cq_inc_ptr(u32 ptr)
-{
- ptr++;
- if (unlikely(ptr == HL_CQ_LENGTH))
- ptr = 0;
- return ptr;
-}
-
-/**
- * hl_eq_inc_ptr - increment ci of eq
- *
- * @ptr: the current ci value of the event queue
- *
- * Increment ptr by 1. If it reaches the number of event queue
- * entries, set it to 0
- */
-inline u32 hl_eq_inc_ptr(u32 ptr)
-{
- ptr++;
- if (unlikely(ptr == HL_EQ_LENGTH))
- ptr = 0;
- return ptr;
-}
-
-static void irq_handle_eqe(struct work_struct *work)
-{
- struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
- eq_work);
- struct hl_device *hdev = eqe_work->hdev;
-
- hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
-
- kfree(eqe_work);
-}
-
-/**
- * hl_irq_handler_cq - irq handler for completion queue
- *
- * @irq: irq number
- * @arg: pointer to completion queue structure
- *
- */
-irqreturn_t hl_irq_handler_cq(int irq, void *arg)
-{
- struct hl_cq *cq = arg;
- struct hl_device *hdev = cq->hdev;
- struct hl_hw_queue *queue;
- struct hl_cs_job *job;
- bool shadow_index_valid;
- u16 shadow_index;
- struct hl_cq_entry *cq_entry, *cq_base;
-
- if (hdev->disabled) {
- dev_dbg(hdev->dev,
- "Device disabled but received IRQ %d for CQ %d\n",
- irq, cq->hw_queue_id);
- return IRQ_HANDLED;
- }
-
- cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address;
-
- while (1) {
- bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
- CQ_ENTRY_READY_MASK)
- >> CQ_ENTRY_READY_SHIFT);
-
- if (!entry_ready)
- break;
-
- cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
-
- /* Make sure we read CQ entry contents after we've
- * checked the ownership bit.
- */
- dma_rmb();
-
- shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
- CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
- >> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
-
- shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
- CQ_ENTRY_SHADOW_INDEX_MASK)
- >> CQ_ENTRY_SHADOW_INDEX_SHIFT);
-
- queue = &hdev->kernel_queues[cq->hw_queue_id];
-
- if ((shadow_index_valid) && (!hdev->disabled)) {
- job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
- queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work);
- }
-
- atomic_inc(&queue->ci);
-
- /* Clear CQ entry ready bit */
- cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
- ~CQ_ENTRY_READY_MASK);
-
- cq->ci = hl_cq_inc_ptr(cq->ci);
-
- /* Increment free slots */
- atomic_inc(&cq->free_slots_cnt);
- }
-
- return IRQ_HANDLED;
-}
-
-/**
- * hl_irq_handler_eq - irq handler for event queue
- *
- * @irq: irq number
- * @arg: pointer to event queue structure
- *
- */
-irqreturn_t hl_irq_handler_eq(int irq, void *arg)
-{
- struct hl_eq *eq = arg;
- struct hl_device *hdev = eq->hdev;
- struct hl_eq_entry *eq_entry;
- struct hl_eq_entry *eq_base;
- struct hl_eqe_work *handle_eqe_work;
-
- eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address;
-
- while (1) {
- bool entry_ready =
- ((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
- EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
-
- if (!entry_ready)
- break;
-
- eq_entry = &eq_base[eq->ci];
-
- /*
- * Make sure we read EQ entry contents after we've
- * checked the ownership bit.
- */
- dma_rmb();
-
- if (hdev->disabled) {
- dev_warn(hdev->dev,
- "Device disabled but received IRQ %d for EQ\n",
- irq);
- goto skip_irq;
- }
-
- handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
- if (handle_eqe_work) {
- INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
- handle_eqe_work->hdev = hdev;
-
- memcpy(&handle_eqe_work->eq_entry, eq_entry,
- sizeof(*eq_entry));
-
- queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
- }
-skip_irq:
- /* Clear EQ entry ready bit */
- eq_entry->hdr.ctl =
- cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
- ~EQ_CTL_READY_MASK);
-
- eq->ci = hl_eq_inc_ptr(eq->ci);
-
- hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
- }
-
- return IRQ_HANDLED;
-}
-
-/**
- * hl_cq_init - main initialization function for an cq object
- *
- * @hdev: pointer to device structure
- * @q: pointer to cq structure
- * @hw_queue_id: The H/W queue ID this completion queue belongs to
- *
- * Allocate dma-able memory for the completion queue and initialize fields
- * Returns 0 on success
- */
-int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
-{
- void *p;
-
- p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
- &q->bus_address, GFP_KERNEL | __GFP_ZERO);
- if (!p)
- return -ENOMEM;
-
- q->hdev = hdev;
- q->kernel_address = (u64) (uintptr_t) p;
- q->hw_queue_id = hw_queue_id;
- q->ci = 0;
- q->pi = 0;
-
- atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
-
- return 0;
-}
-
-/**
- * hl_cq_fini - destroy completion queue
- *
- * @hdev: pointer to device structure
- * @q: pointer to cq structure
- *
- * Free the completion queue memory
- */
-void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
-{
- hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address, q->bus_address);
-}
-
-void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q)
-{
- q->ci = 0;
- q->pi = 0;
-
- atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
-
- /*
- * It's not enough to just reset the PI/CI because the H/W may have
- * written valid completion entries before it was halted and therefore
- * we need to clean the actual queues so we won't process old entries
- * when the device is operational again
- */
-
- memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES);
-}
-
-/**
- * hl_eq_init - main initialization function for an event queue object
- *
- * @hdev: pointer to device structure
- * @q: pointer to eq structure
- *
- * Allocate dma-able memory for the event queue and initialize fields
- * Returns 0 on success
- */
-int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
-{
- void *p;
-
- p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
- HL_EQ_SIZE_IN_BYTES,
- &q->bus_address);
- if (!p)
- return -ENOMEM;
-
- q->hdev = hdev;
- q->kernel_address = (u64) (uintptr_t) p;
- q->ci = 0;
-
- return 0;
-}
-
-/**
- * hl_eq_fini - destroy event queue
- *
- * @hdev: pointer to device structure
- * @q: pointer to eq structure
- *
- * Free the event queue memory
- */
-void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
-{
- flush_workqueue(hdev->eq_wq);
-
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
- HL_EQ_SIZE_IN_BYTES,
- (void *) (uintptr_t) q->kernel_address);
-}
-
-void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q)
-{
- q->ci = 0;
-
- /*
- * It's not enough to just reset the PI/CI because the H/W may have
- * written valid completion entries before it was halted and therefore
- * we need to clean the actual queues so we won't process old entries
- * when the device is operational again
- */
-
- memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include <uapi/misc/habanalabs.h>
-#include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-#include <linux/genalloc.h>
-
-#define HL_MMU_DEBUG 0
-
-/*
- * The va ranges in context object contain a list with the available chunks of
- * device virtual memory.
- * There is one range for host allocations and one for DRAM allocations.
- *
- * On initialization each range contains one chunk of all of its available
- * virtual range which is a half of the total device virtual range.
- *
- * On each mapping of physical pages, a suitable virtual range chunk (with a
- * minimum size) is selected from the list. If the chunk size equals the
- * requested size, the chunk is returned. Otherwise, the chunk is split into
- * two chunks - one to return as result and a remainder to stay in the list.
- *
- * On each Unmapping of a virtual address, the relevant virtual chunk is
- * returned to the list. The chunk is added to the list and if its edges match
- * the edges of the adjacent chunks (means a contiguous chunk can be created),
- * the chunks are merged.
- *
- * On finish, the list is checked to have only one chunk of all the relevant
- * virtual range (which is a half of the device total virtual range).
- * If not (means not all mappings were unmapped), a warning is printed.
- */
-
-/*
- * alloc_device_memory - allocate device memory
- *
- * @ctx : current context
- * @args : host parameters containing the requested size
- * @ret_handle : result handle
- *
- * This function does the following:
- * - Allocate the requested size rounded up to 2MB pages
- * - Return unique handle
- */
-static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
- u32 *ret_handle)
-{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
- struct hl_vm_phys_pg_pack *phys_pg_pack;
- u64 paddr = 0, total_size, num_pgs, i;
- u32 num_curr_pgs, page_size, page_shift;
- int handle, rc;
- bool contiguous;
-
- num_curr_pgs = 0;
- page_size = hdev->asic_prop.dram_page_size;
- page_shift = __ffs(page_size);
- num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
- total_size = num_pgs << page_shift;
-
- contiguous = args->flags & HL_MEM_CONTIGUOUS;
-
- if (contiguous) {
- paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
- if (!paddr) {
- dev_err(hdev->dev,
- "failed to allocate %llu huge contiguous pages\n",
- num_pgs);
- return -ENOMEM;
- }
- }
-
- phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
- if (!phys_pg_pack) {
- rc = -ENOMEM;
- goto pages_pack_err;
- }
-
- phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
- phys_pg_pack->asid = ctx->asid;
- phys_pg_pack->npages = num_pgs;
- phys_pg_pack->page_size = page_size;
- phys_pg_pack->total_size = total_size;
- phys_pg_pack->flags = args->flags;
- phys_pg_pack->contiguous = contiguous;
-
- phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL);
- if (!phys_pg_pack->pages) {
- rc = -ENOMEM;
- goto pages_arr_err;
- }
-
- if (phys_pg_pack->contiguous) {
- for (i = 0 ; i < num_pgs ; i++)
- phys_pg_pack->pages[i] = paddr + i * page_size;
- } else {
- for (i = 0 ; i < num_pgs ; i++) {
- phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
- vm->dram_pg_pool,
- page_size);
- if (!phys_pg_pack->pages[i]) {
- dev_err(hdev->dev,
- "Failed to allocate device memory (out of memory)\n");
- rc = -ENOMEM;
- goto page_err;
- }
-
- num_curr_pgs++;
- }
- }
-
- spin_lock(&vm->idr_lock);
- handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
- GFP_ATOMIC);
- spin_unlock(&vm->idr_lock);
-
- if (handle < 0) {
- dev_err(hdev->dev, "Failed to get handle for page\n");
- rc = -EFAULT;
- goto idr_err;
- }
-
- for (i = 0 ; i < num_pgs ; i++)
- kref_get(&vm->dram_pg_pool_refcount);
-
- phys_pg_pack->handle = handle;
-
- atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
- atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
-
- *ret_handle = handle;
-
- return 0;
-
-idr_err:
-page_err:
- if (!phys_pg_pack->contiguous)
- for (i = 0 ; i < num_curr_pgs ; i++)
- gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
- page_size);
-
- kvfree(phys_pg_pack->pages);
-pages_arr_err:
- kfree(phys_pg_pack);
-pages_pack_err:
- if (contiguous)
- gen_pool_free(vm->dram_pg_pool, paddr, total_size);
-
- return rc;
-}
-
-/*
- * dma_map_host_va - DMA mapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @p_userptr: pointer to result userptr structure
- *
- * This function does the following:
- * - Allocate userptr structure
- * - Pin the given host memory using the userptr structure
- * - Perform DMA mapping to have the DMA addresses of the pages
- */
-static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
- struct hl_userptr **p_userptr)
-{
- struct hl_userptr *userptr;
- int rc;
-
- userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
- if (!userptr) {
- rc = -ENOMEM;
- goto userptr_err;
- }
-
- rc = hl_pin_host_memory(hdev, addr, size, userptr);
- if (rc) {
- dev_err(hdev->dev, "Failed to pin host memory\n");
- goto pin_err;
- }
-
- rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents, DMA_BIDIRECTIONAL);
- if (rc) {
- dev_err(hdev->dev, "failed to map sgt with DMA region\n");
- goto dma_map_err;
- }
-
- userptr->dma_mapped = true;
- userptr->dir = DMA_BIDIRECTIONAL;
- userptr->vm_type = VM_TYPE_USERPTR;
-
- *p_userptr = userptr;
-
- return 0;
-
-dma_map_err:
- hl_unpin_host_memory(hdev, userptr);
-pin_err:
- kfree(userptr);
-userptr_err:
-
- return rc;
-}
-
-/*
- * dma_unmap_host_va - DMA unmapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @userptr: userptr to free
- *
- * This function does the following:
- * - Unpins the physical pages
- * - Frees the userptr structure
- */
-static void dma_unmap_host_va(struct hl_device *hdev,
- struct hl_userptr *userptr)
-{
- hl_unpin_host_memory(hdev, userptr);
- kfree(userptr);
-}
-
-/*
- * dram_pg_pool_do_release - free DRAM pages pool
- *
- * @ref : pointer to reference object
- *
- * This function does the following:
- * - Frees the idr structure of physical pages handles
- * - Frees the generic pool of DRAM physical pages
- */
-static void dram_pg_pool_do_release(struct kref *ref)
-{
- struct hl_vm *vm = container_of(ref, struct hl_vm,
- dram_pg_pool_refcount);
-
- /*
- * free the idr here as only here we know for sure that there are no
- * allocated physical pages and hence there are no handles in use
- */
- idr_destroy(&vm->phys_pg_pack_handles);
- gen_pool_destroy(vm->dram_pg_pool);
-}
-
-/*
- * free_phys_pg_pack - free physical page pack
- * @hdev: habanalabs device structure
- * @phys_pg_pack: physical page pack to free
- *
- * This function does the following:
- * - For DRAM memory only, iterate over the pack and free each physical block
- * structure by returning it to the general pool
- * - Free the hl_vm_phys_pg_pack structure
- */
-static void free_phys_pg_pack(struct hl_device *hdev,
- struct hl_vm_phys_pg_pack *phys_pg_pack)
-{
- struct hl_vm *vm = &hdev->vm;
- u64 i;
-
- if (!phys_pg_pack->created_from_userptr) {
- if (phys_pg_pack->contiguous) {
- gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
- phys_pg_pack->total_size);
-
- for (i = 0; i < phys_pg_pack->npages ; i++)
- kref_put(&vm->dram_pg_pool_refcount,
- dram_pg_pool_do_release);
- } else {
- for (i = 0 ; i < phys_pg_pack->npages ; i++) {
- gen_pool_free(vm->dram_pg_pool,
- phys_pg_pack->pages[i],
- phys_pg_pack->page_size);
- kref_put(&vm->dram_pg_pool_refcount,
- dram_pg_pool_do_release);
- }
- }
- }
-
- kvfree(phys_pg_pack->pages);
- kfree(phys_pg_pack);
-}
-
-/*
- * free_device_memory - free device memory
- *
- * @ctx : current context
- * @handle : handle of the memory chunk to free
- *
- * This function does the following:
- * - Free the device memory related to the given handle
- */
-static int free_device_memory(struct hl_ctx *ctx, u32 handle)
-{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
- struct hl_vm_phys_pg_pack *phys_pg_pack;
-
- spin_lock(&vm->idr_lock);
- phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
- if (phys_pg_pack) {
- if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
- dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
- handle);
- spin_unlock(&vm->idr_lock);
- return -EINVAL;
- }
-
- /*
- * must remove from idr before the freeing of the physical
- * pages as the refcount of the pool is also the trigger of the
- * idr destroy
- */
- idr_remove(&vm->phys_pg_pack_handles, handle);
- spin_unlock(&vm->idr_lock);
-
- atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
- atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
-
- free_phys_pg_pack(hdev, phys_pg_pack);
- } else {
- spin_unlock(&vm->idr_lock);
- dev_err(hdev->dev,
- "free device memory failed, no match for handle %u\n",
- handle);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/*
- * clear_va_list_locked - free virtual addresses list
- *
- * @hdev : habanalabs device structure
- * @va_list : list of virtual addresses to free
- *
- * This function does the following:
- * - Iterate over the list and free each virtual addresses block
- *
- * This function should be called only when va_list lock is taken
- */
-static void clear_va_list_locked(struct hl_device *hdev,
- struct list_head *va_list)
-{
- struct hl_vm_va_block *va_block, *tmp;
-
- list_for_each_entry_safe(va_block, tmp, va_list, node) {
- list_del(&va_block->node);
- kfree(va_block);
- }
-}
-
-/*
- * print_va_list_locked - print virtual addresses list
- *
- * @hdev : habanalabs device structure
- * @va_list : list of virtual addresses to print
- *
- * This function does the following:
- * - Iterate over the list and print each virtual addresses block
- *
- * This function should be called only when va_list lock is taken
- */
-static void print_va_list_locked(struct hl_device *hdev,
- struct list_head *va_list)
-{
-#if HL_MMU_DEBUG
- struct hl_vm_va_block *va_block;
-
- dev_dbg(hdev->dev, "print va list:\n");
-
- list_for_each_entry(va_block, va_list, node)
- dev_dbg(hdev->dev,
- "va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
- va_block->start, va_block->end, va_block->size);
-#endif
-}
-
-/*
- * merge_va_blocks_locked - merge a virtual block if possible
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @va_block : virtual block to merge with adjacent blocks
- *
- * This function does the following:
- * - Merge the given blocks with the adjacent blocks if their virtual ranges
- * create a contiguous virtual range
- *
- * This Function should be called only when va_list lock is taken
- */
-static void merge_va_blocks_locked(struct hl_device *hdev,
- struct list_head *va_list, struct hl_vm_va_block *va_block)
-{
- struct hl_vm_va_block *prev, *next;
-
- prev = list_prev_entry(va_block, node);
- if (&prev->node != va_list && prev->end + 1 == va_block->start) {
- prev->end = va_block->end;
- prev->size = prev->end - prev->start;
- list_del(&va_block->node);
- kfree(va_block);
- va_block = prev;
- }
-
- next = list_next_entry(va_block, node);
- if (&next->node != va_list && va_block->end + 1 == next->start) {
- next->start = va_block->start;
- next->size = next->end - next->start;
- list_del(&va_block->node);
- kfree(va_block);
- }
-}
-
-/*
- * add_va_block_locked - add a virtual block to the virtual addresses list
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @start : start virtual address
- * @end : end virtual address
- *
- * This function does the following:
- * - Add the given block to the virtual blocks list and merge with other
- * blocks if a contiguous virtual block can be created
- *
- * This Function should be called only when va_list lock is taken
- */
-static int add_va_block_locked(struct hl_device *hdev,
- struct list_head *va_list, u64 start, u64 end)
-{
- struct hl_vm_va_block *va_block, *res = NULL;
- u64 size = end - start;
-
- print_va_list_locked(hdev, va_list);
-
- list_for_each_entry(va_block, va_list, node) {
- /* TODO: remove upon matureness */
- if (hl_mem_area_crosses_range(start, size, va_block->start,
- va_block->end)) {
- dev_err(hdev->dev,
- "block crossing ranges at start 0x%llx, end 0x%llx\n",
- va_block->start, va_block->end);
- return -EINVAL;
- }
-
- if (va_block->end < start)
- res = va_block;
- }
-
- va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
- if (!va_block)
- return -ENOMEM;
-
- va_block->start = start;
- va_block->end = end;
- va_block->size = size;
-
- if (!res)
- list_add(&va_block->node, va_list);
- else
- list_add(&va_block->node, &res->node);
-
- merge_va_blocks_locked(hdev, va_list, va_block);
-
- print_va_list_locked(hdev, va_list);
-
- return 0;
-}
-
-/*
- * add_va_block - wrapper for add_va_block_locked
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_list : pointer to the virtual addresses block list
- * @start : start virtual address
- * @end : end virtual address
- *
- * This function does the following:
- * - Takes the list lock and calls add_va_block_locked
- */
-static inline int add_va_block(struct hl_device *hdev,
- struct hl_va_range *va_range, u64 start, u64 end)
-{
- int rc;
-
- mutex_lock(&va_range->lock);
- rc = add_va_block_locked(hdev, &va_range->list, start, end);
- mutex_unlock(&va_range->lock);
-
- return rc;
-}
-
-/*
- * get_va_block - get a virtual block with the requested size
- *
- * @hdev : pointer to the habanalabs device structure
- * @va_range : pointer to the virtual addresses range
- * @size : requested block size
- * @hint_addr : hint for request address by the user
- * @is_userptr : is host or DRAM memory
- *
- * This function does the following:
- * - Iterate on the virtual block list to find a suitable virtual block for the
- * requested size
- * - Reserve the requested block and update the list
- * - Return the start address of the virtual block
- */
-static u64 get_va_block(struct hl_device *hdev,
- struct hl_va_range *va_range, u64 size, u64 hint_addr,
- bool is_userptr)
-{
- struct hl_vm_va_block *va_block, *new_va_block = NULL;
- u64 valid_start, valid_size, prev_start, prev_end, page_mask,
- res_valid_start = 0, res_valid_size = 0;
- u32 page_size;
- bool add_prev = false;
-
- if (is_userptr)
- /*
- * We cannot know if the user allocated memory with huge pages
- * or not, hence we continue with the biggest possible
- * granularity.
- */
- page_size = hdev->asic_prop.pmmu_huge.page_size;
- else
- page_size = hdev->asic_prop.dmmu.page_size;
-
- page_mask = ~((u64)page_size - 1);
-
- mutex_lock(&va_range->lock);
-
- print_va_list_locked(hdev, &va_range->list);
-
- list_for_each_entry(va_block, &va_range->list, node) {
- /* calc the first possible aligned addr */
- valid_start = va_block->start;
-
- if (valid_start & (page_size - 1)) {
- valid_start &= page_mask;
- valid_start += page_size;
- if (valid_start > va_block->end)
- continue;
- }
-
- valid_size = va_block->end - valid_start;
-
- if (valid_size >= size &&
- (!new_va_block || valid_size < res_valid_size)) {
- new_va_block = va_block;
- res_valid_start = valid_start;
- res_valid_size = valid_size;
- }
-
- if (hint_addr && hint_addr >= valid_start &&
- ((hint_addr + size) <= va_block->end)) {
- new_va_block = va_block;
- res_valid_start = hint_addr;
- res_valid_size = valid_size;
- break;
- }
- }
-
- if (!new_va_block) {
- dev_err(hdev->dev, "no available va block for size %llu\n",
- size);
- goto out;
- }
-
- if (res_valid_start > new_va_block->start) {
- prev_start = new_va_block->start;
- prev_end = res_valid_start - 1;
-
- new_va_block->start = res_valid_start;
- new_va_block->size = res_valid_size;
-
- add_prev = true;
- }
-
- if (new_va_block->size > size) {
- new_va_block->start += size;
- new_va_block->size = new_va_block->end - new_va_block->start;
- } else {
- list_del(&new_va_block->node);
- kfree(new_va_block);
- }
-
- if (add_prev)
- add_va_block_locked(hdev, &va_range->list, prev_start,
- prev_end);
-
- print_va_list_locked(hdev, &va_range->list);
-out:
- mutex_unlock(&va_range->lock);
-
- return res_valid_start;
-}
-
-/*
- * get_sg_info - get number of pages and the DMA address from SG list
- *
- * @sg : the SG list
- * @dma_addr : pointer to DMA address to return
- *
- * Calculate the number of consecutive pages described by the SG list. Take the
- * offset of the address in the first page, add to it the length and round it up
- * to the number of needed pages.
- */
-static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
-{
- *dma_addr = sg_dma_address(sg);
-
- return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
- (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-}
-
-/*
- * init_phys_pg_pack_from_userptr - initialize physical page pack from host
- * memory
- * @ctx: current context
- * @userptr: userptr to initialize from
- * @pphys_pg_pack: result pointer
- *
- * This function does the following:
- * - Pin the physical pages related to the given virtual block
- * - Create a physical page pack from the physical pages related to the given
- * virtual block
- */
-static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
- struct hl_userptr *userptr,
- struct hl_vm_phys_pg_pack **pphys_pg_pack)
-{
- struct hl_vm_phys_pg_pack *phys_pg_pack;
- struct scatterlist *sg;
- dma_addr_t dma_addr;
- u64 page_mask, total_npages;
- u32 npages, page_size = PAGE_SIZE,
- huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
- bool first = true, is_huge_page_opt = true;
- int rc, i, j;
- u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
-
- phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
- if (!phys_pg_pack)
- return -ENOMEM;
-
- phys_pg_pack->vm_type = userptr->vm_type;
- phys_pg_pack->created_from_userptr = true;
- phys_pg_pack->asid = ctx->asid;
- atomic_set(&phys_pg_pack->mapping_cnt, 1);
-
- /* Only if all dma_addrs are aligned to 2MB and their
- * sizes is at least 2MB, we can use huge page mapping.
- * We limit the 2MB optimization to this condition,
- * since later on we acquire the related VA range as one
- * consecutive block.
- */
- total_npages = 0;
- for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
- npages = get_sg_info(sg, &dma_addr);
-
- total_npages += npages;
-
- if ((npages % pgs_in_huge_page) ||
- (dma_addr & (huge_page_size - 1)))
- is_huge_page_opt = false;
- }
-
- if (is_huge_page_opt) {
- page_size = huge_page_size;
- do_div(total_npages, pgs_in_huge_page);
- }
-
- page_mask = ~(((u64) page_size) - 1);
-
- phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64),
- GFP_KERNEL);
- if (!phys_pg_pack->pages) {
- rc = -ENOMEM;
- goto page_pack_arr_mem_err;
- }
-
- phys_pg_pack->npages = total_npages;
- phys_pg_pack->page_size = page_size;
- phys_pg_pack->total_size = total_npages * page_size;
-
- j = 0;
- for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
- npages = get_sg_info(sg, &dma_addr);
-
- /* align down to physical page size and save the offset */
- if (first) {
- first = false;
- phys_pg_pack->offset = dma_addr & (page_size - 1);
- dma_addr &= page_mask;
- }
-
- while (npages) {
- phys_pg_pack->pages[j++] = dma_addr;
- dma_addr += page_size;
-
- if (is_huge_page_opt)
- npages -= pgs_in_huge_page;
- else
- npages--;
- }
- }
-
- *pphys_pg_pack = phys_pg_pack;
-
- return 0;
-
-page_pack_arr_mem_err:
- kfree(phys_pg_pack);
-
- return rc;
-}
-
-/*
- * map_phys_pg_pack - maps the physical page pack.
- * @ctx: current context
- * @vaddr: start address of the virtual area to map from
- * @phys_pg_pack: the pack of physical pages to map to
- *
- * This function does the following:
- * - Maps each chunk of virtual memory to matching physical chunk
- * - Stores number of successful mappings in the given argument
- * - Returns 0 on success, error code otherwise
- */
-static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
- struct hl_vm_phys_pg_pack *phys_pg_pack)
-{
- struct hl_device *hdev = ctx->hdev;
- u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
- u32 page_size = phys_pg_pack->page_size;
- int rc = 0;
-
- for (i = 0 ; i < phys_pg_pack->npages ; i++) {
- paddr = phys_pg_pack->pages[i];
-
- rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
- (i + 1) == phys_pg_pack->npages);
- if (rc) {
- dev_err(hdev->dev,
- "map failed for handle %u, npages: %llu, mapped: %llu",
- phys_pg_pack->handle, phys_pg_pack->npages,
- mapped_pg_cnt);
- goto err;
- }
-
- mapped_pg_cnt++;
- next_vaddr += page_size;
- }
-
- return 0;
-
-err:
- next_vaddr = vaddr;
- for (i = 0 ; i < mapped_pg_cnt ; i++) {
- if (hl_mmu_unmap(ctx, next_vaddr, page_size,
- (i + 1) == mapped_pg_cnt))
- dev_warn_ratelimited(hdev->dev,
- "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
- phys_pg_pack->handle, next_vaddr,
- phys_pg_pack->pages[i], page_size);
-
- next_vaddr += page_size;
- }
-
- return rc;
-}
-
-/*
- * unmap_phys_pg_pack - unmaps the physical page pack
- * @ctx: current context
- * @vaddr: start address of the virtual area to unmap
- * @phys_pg_pack: the pack of physical pages to unmap
- */
-static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
- struct hl_vm_phys_pg_pack *phys_pg_pack)
-{
- struct hl_device *hdev = ctx->hdev;
- u64 next_vaddr, i;
- u32 page_size;
-
- page_size = phys_pg_pack->page_size;
- next_vaddr = vaddr;
-
- for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
- if (hl_mmu_unmap(ctx, next_vaddr, page_size,
- (i + 1) == phys_pg_pack->npages))
- dev_warn_ratelimited(hdev->dev,
- "unmap failed for vaddr: 0x%llx\n", next_vaddr);
-
- /*
- * unmapping on Palladium can be really long, so avoid a CPU
- * soft lockup bug by sleeping a little between unmapping pages
- */
- if (hdev->pldm)
- usleep_range(500, 1000);
- }
-}
-
-static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
- u64 *paddr)
-{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
- struct hl_vm_phys_pg_pack *phys_pg_pack;
- u32 handle;
-
- handle = lower_32_bits(args->map_device.handle);
- spin_lock(&vm->idr_lock);
- phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
- if (!phys_pg_pack) {
- spin_unlock(&vm->idr_lock);
- dev_err(hdev->dev, "no match for handle %u\n", handle);
- return -EINVAL;
- }
-
- *paddr = phys_pg_pack->pages[0];
-
- spin_unlock(&vm->idr_lock);
-
- return 0;
-}
-
-/*
- * map_device_va - map the given memory
- *
- * @ctx : current context
- * @args : host parameters with handle/host virtual address
- * @device_addr : pointer to result device virtual address
- *
- * This function does the following:
- * - If given a physical device memory handle, map to a device virtual block
- * and return the start address of this block
- * - If given a host virtual address and size, find the related physical pages,
- * map a device virtual block to this pages and return the start address of
- * this block
- */
-static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
- u64 *device_addr)
-{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
- struct hl_vm_phys_pg_pack *phys_pg_pack;
- struct hl_userptr *userptr = NULL;
- struct hl_vm_hash_node *hnode;
- struct hl_va_range *va_range;
- enum vm_type_t *vm_type;
- u64 ret_vaddr, hint_addr;
- u32 handle = 0;
- int rc;
- bool is_userptr = args->flags & HL_MEM_USERPTR;
-
- /* Assume failure */
- *device_addr = 0;
-
- if (is_userptr) {
- u64 addr = args->map_host.host_virt_addr,
- size = args->map_host.mem_size;
-
- rc = dma_map_host_va(hdev, addr, size, &userptr);
- if (rc) {
- dev_err(hdev->dev, "failed to get userptr from va\n");
- return rc;
- }
-
- rc = init_phys_pg_pack_from_userptr(ctx, userptr,
- &phys_pg_pack);
- if (rc) {
- dev_err(hdev->dev,
- "unable to init page pack for vaddr 0x%llx\n",
- addr);
- goto init_page_pack_err;
- }
-
- vm_type = (enum vm_type_t *) userptr;
- hint_addr = args->map_host.hint_addr;
- handle = phys_pg_pack->handle;
- } else {
- handle = lower_32_bits(args->map_device.handle);
-
- spin_lock(&vm->idr_lock);
- phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
- if (!phys_pg_pack) {
- spin_unlock(&vm->idr_lock);
- dev_err(hdev->dev,
- "no match for handle %u\n", handle);
- return -EINVAL;
- }
-
- /* increment now to avoid freeing device memory while mapping */
- atomic_inc(&phys_pg_pack->mapping_cnt);
-
- spin_unlock(&vm->idr_lock);
-
- vm_type = (enum vm_type_t *) phys_pg_pack;
-
- hint_addr = args->map_device.hint_addr;
- }
-
- /*
- * relevant for mapping device physical memory only, as host memory is
- * implicitly shared
- */
- if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
- phys_pg_pack->asid != ctx->asid) {
- dev_err(hdev->dev,
- "Failed to map memory, handle %u is not shared\n",
- handle);
- rc = -EPERM;
- goto shared_err;
- }
-
- hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
- if (!hnode) {
- rc = -ENOMEM;
- goto hnode_err;
- }
-
- if (is_userptr)
- if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
- va_range = ctx->host_va_range;
- else
- va_range = ctx->host_huge_va_range;
- else
- va_range = ctx->dram_va_range;
-
- ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
- hint_addr, is_userptr);
- if (!ret_vaddr) {
- dev_err(hdev->dev, "no available va block for handle %u\n",
- handle);
- rc = -ENOMEM;
- goto va_block_err;
- }
-
- mutex_lock(&ctx->mmu_lock);
-
- rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
- if (rc) {
- mutex_unlock(&ctx->mmu_lock);
- dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
- handle);
- goto map_err;
- }
-
- rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
-
- mutex_unlock(&ctx->mmu_lock);
-
- if (rc) {
- dev_err(hdev->dev,
- "mapping handle %u failed due to MMU cache invalidation\n",
- handle);
- goto map_err;
- }
-
- ret_vaddr += phys_pg_pack->offset;
-
- hnode->ptr = vm_type;
- hnode->vaddr = ret_vaddr;
-
- mutex_lock(&ctx->mem_hash_lock);
- hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
- mutex_unlock(&ctx->mem_hash_lock);
-
- *device_addr = ret_vaddr;
-
- if (is_userptr)
- free_phys_pg_pack(hdev, phys_pg_pack);
-
- return 0;
-
-map_err:
- if (add_va_block(hdev, va_range, ret_vaddr,
- ret_vaddr + phys_pg_pack->total_size - 1))
- dev_warn(hdev->dev,
- "release va block failed for handle 0x%x, vaddr: 0x%llx\n",
- handle, ret_vaddr);
-
-va_block_err:
- kfree(hnode);
-hnode_err:
-shared_err:
- atomic_dec(&phys_pg_pack->mapping_cnt);
- if (is_userptr)
- free_phys_pg_pack(hdev, phys_pg_pack);
-init_page_pack_err:
- if (is_userptr)
- dma_unmap_host_va(hdev, userptr);
-
- return rc;
-}
-
-/*
- * unmap_device_va - unmap the given device virtual address
- *
- * @ctx : current context
- * @vaddr : device virtual address to unmap
- * @ctx_free : true if in context free flow, false otherwise.
- *
- * This function does the following:
- * - Unmap the physical pages related to the given virtual address
- * - return the device virtual block to the virtual block list
- */
-static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
-{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
- struct hl_vm_hash_node *hnode = NULL;
- struct hl_userptr *userptr = NULL;
- struct hl_va_range *va_range;
- enum vm_type_t *vm_type;
- bool is_userptr;
- int rc = 0;
-
- /* protect from double entrance */
- mutex_lock(&ctx->mem_hash_lock);
- hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
- if (vaddr == hnode->vaddr)
- break;
-
- if (!hnode) {
- mutex_unlock(&ctx->mem_hash_lock);
- dev_err(hdev->dev,
- "unmap failed, no mem hnode for vaddr 0x%llx\n",
- vaddr);
- return -EINVAL;
- }
-
- hash_del(&hnode->node);
- mutex_unlock(&ctx->mem_hash_lock);
-
- vm_type = hnode->ptr;
-
- if (*vm_type == VM_TYPE_USERPTR) {
- is_userptr = true;
- userptr = hnode->ptr;
- rc = init_phys_pg_pack_from_userptr(ctx, userptr,
- &phys_pg_pack);
- if (rc) {
- dev_err(hdev->dev,
- "unable to init page pack for vaddr 0x%llx\n",
- vaddr);
- goto vm_type_err;
- }
-
- if (phys_pg_pack->page_size ==
- hdev->asic_prop.pmmu.page_size)
- va_range = ctx->host_va_range;
- else
- va_range = ctx->host_huge_va_range;
- } else if (*vm_type == VM_TYPE_PHYS_PACK) {
- is_userptr = false;
- va_range = ctx->dram_va_range;
- phys_pg_pack = hnode->ptr;
- } else {
- dev_warn(hdev->dev,
- "unmap failed, unknown vm desc for vaddr 0x%llx\n",
- vaddr);
- rc = -EFAULT;
- goto vm_type_err;
- }
-
- if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
- dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
- rc = -EINVAL;
- goto mapping_cnt_err;
- }
-
- vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
-
- mutex_lock(&ctx->mmu_lock);
-
- unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
-
- /*
- * During context free this function is called in a loop to clean all
- * the context mappings. Hence the cache invalidation can be called once
- * at the loop end rather than for each iteration
- */
- if (!ctx_free)
- rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
- *vm_type);
-
- mutex_unlock(&ctx->mmu_lock);
-
- /*
- * If the context is closing we don't need to check for the MMU cache
- * invalidation return code and update the VA free list as in this flow
- * we invalidate the MMU cache outside of this unmap function and the VA
- * free list will be freed anyway.
- */
- if (!ctx_free) {
- int tmp_rc;
-
- if (rc)
- dev_err(hdev->dev,
- "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
- vaddr);
-
- tmp_rc = add_va_block(hdev, va_range, vaddr,
- vaddr + phys_pg_pack->total_size - 1);
- if (tmp_rc) {
- dev_warn(hdev->dev,
- "add va block failed for vaddr: 0x%llx\n",
- vaddr);
- if (!rc)
- rc = tmp_rc;
- }
- }
-
- atomic_dec(&phys_pg_pack->mapping_cnt);
- kfree(hnode);
-
- if (is_userptr) {
- free_phys_pg_pack(hdev, phys_pg_pack);
- dma_unmap_host_va(hdev, userptr);
- }
-
- return rc;
-
-mapping_cnt_err:
- if (is_userptr)
- free_phys_pg_pack(hdev, phys_pg_pack);
-vm_type_err:
- mutex_lock(&ctx->mem_hash_lock);
- hash_add(ctx->mem_hash, &hnode->node, vaddr);
- mutex_unlock(&ctx->mem_hash_lock);
-
- return rc;
-}
-
-static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
-{
- struct hl_device *hdev = hpriv->hdev;
- struct hl_ctx *ctx = hpriv->ctx;
- u64 device_addr = 0;
- u32 handle = 0;
- int rc;
-
- switch (args->in.op) {
- case HL_MEM_OP_ALLOC:
- if (args->in.alloc.mem_size == 0) {
- dev_err(hdev->dev,
- "alloc size must be larger than 0\n");
- rc = -EINVAL;
- goto out;
- }
-
- /* Force contiguous as there are no real MMU
- * translations to overcome physical memory gaps
- */
- args->in.flags |= HL_MEM_CONTIGUOUS;
- rc = alloc_device_memory(ctx, &args->in, &handle);
-
- memset(args, 0, sizeof(*args));
- args->out.handle = (__u64) handle;
- break;
-
- case HL_MEM_OP_FREE:
- rc = free_device_memory(ctx, args->in.free.handle);
- break;
-
- case HL_MEM_OP_MAP:
- if (args->in.flags & HL_MEM_USERPTR) {
- device_addr = args->in.map_host.host_virt_addr;
- rc = 0;
- } else {
- rc = get_paddr_from_handle(ctx, &args->in,
- &device_addr);
- }
-
- memset(args, 0, sizeof(*args));
- args->out.device_virt_addr = device_addr;
- break;
-
- case HL_MEM_OP_UNMAP:
- rc = 0;
- break;
-
- default:
- dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
- rc = -ENOTTY;
- break;
- }
-
-out:
- return rc;
-}
-
-int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
-{
- union hl_mem_args *args = data;
- struct hl_device *hdev = hpriv->hdev;
- struct hl_ctx *ctx = hpriv->ctx;
- u64 device_addr = 0;
- u32 handle = 0;
- int rc;
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- dev_warn_ratelimited(hdev->dev,
- "Device is %s. Can't execute MEMORY IOCTL\n",
- atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
- return -EBUSY;
- }
-
- if (!hdev->mmu_enable)
- return mem_ioctl_no_mmu(hpriv, args);
-
- switch (args->in.op) {
- case HL_MEM_OP_ALLOC:
- if (!hdev->dram_supports_virtual_memory) {
- dev_err(hdev->dev, "DRAM alloc is not supported\n");
- rc = -EINVAL;
- goto out;
- }
-
- if (args->in.alloc.mem_size == 0) {
- dev_err(hdev->dev,
- "alloc size must be larger than 0\n");
- rc = -EINVAL;
- goto out;
- }
- rc = alloc_device_memory(ctx, &args->in, &handle);
-
- memset(args, 0, sizeof(*args));
- args->out.handle = (__u64) handle;
- break;
-
- case HL_MEM_OP_FREE:
- rc = free_device_memory(ctx, args->in.free.handle);
- break;
-
- case HL_MEM_OP_MAP:
- rc = map_device_va(ctx, &args->in, &device_addr);
-
- memset(args, 0, sizeof(*args));
- args->out.device_virt_addr = device_addr;
- break;
-
- case HL_MEM_OP_UNMAP:
- rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
- false);
- break;
-
- default:
- dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
- rc = -ENOTTY;
- break;
- }
-
-out:
- return rc;
-}
-
-static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
- u32 npages, u64 start, u32 offset,
- struct hl_userptr *userptr)
-{
- int rc;
-
- if (!access_ok((void __user *) (uintptr_t) addr, size)) {
- dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
- return -EFAULT;
- }
-
- userptr->vec = frame_vector_create(npages);
- if (!userptr->vec) {
- dev_err(hdev->dev, "Failed to create frame vector\n");
- return -ENOMEM;
- }
-
- rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
- userptr->vec);
-
- if (rc != npages) {
- dev_err(hdev->dev,
- "Failed to map host memory, user ptr probably wrong\n");
- if (rc < 0)
- goto destroy_framevec;
- rc = -EFAULT;
- goto put_framevec;
- }
-
- if (frame_vector_to_pages(userptr->vec) < 0) {
- dev_err(hdev->dev,
- "Failed to translate frame vector to pages\n");
- rc = -EFAULT;
- goto put_framevec;
- }
-
- rc = sg_alloc_table_from_pages(userptr->sgt,
- frame_vector_pages(userptr->vec),
- npages, offset, size, GFP_ATOMIC);
- if (rc < 0) {
- dev_err(hdev->dev, "failed to create SG table from pages\n");
- goto put_framevec;
- }
-
- return 0;
-
-put_framevec:
- put_vaddr_frames(userptr->vec);
-destroy_framevec:
- frame_vector_destroy(userptr->vec);
- return rc;
-}
-
-/*
- * hl_pin_host_memory - pins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @userptr: pointer to hl_userptr structure
- *
- * This function does the following:
- * - Pins the physical pages
- * - Create an SG list from those pages
- */
-int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
- struct hl_userptr *userptr)
-{
- u64 start, end;
- u32 npages, offset;
- int rc;
-
- if (!size) {
- dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
- return -EINVAL;
- }
-
- /*
- * If the combination of the address and size requested for this memory
- * region causes an integer overflow, return error.
- */
- if (((addr + size) < addr) ||
- PAGE_ALIGN(addr + size) < (addr + size)) {
- dev_err(hdev->dev,
- "user pointer 0x%llx + %llu causes integer overflow\n",
- addr, size);
- return -EINVAL;
- }
-
- /*
- * This function can be called also from data path, hence use atomic
- * always as it is not a big allocation.
- */
- userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
- if (!userptr->sgt)
- return -ENOMEM;
-
- start = addr & PAGE_MASK;
- offset = addr & ~PAGE_MASK;
- end = PAGE_ALIGN(addr + size);
- npages = (end - start) >> PAGE_SHIFT;
-
- userptr->size = size;
- userptr->addr = addr;
- userptr->dma_mapped = false;
- INIT_LIST_HEAD(&userptr->job_node);
-
- rc = get_user_memory(hdev, addr, size, npages, start, offset,
- userptr);
- if (rc) {
- dev_err(hdev->dev,
- "failed to get user memory for address 0x%llx\n",
- addr);
- goto free_sgt;
- }
-
- hl_debugfs_add_userptr(hdev, userptr);
-
- return 0;
-
-free_sgt:
- kfree(userptr->sgt);
- return rc;
-}
-
-/*
- * hl_unpin_host_memory - unpins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @userptr: pointer to hl_userptr structure
- *
- * This function does the following:
- * - Unpins the physical pages related to the host memory
- * - Free the SG list
- */
-void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
-{
- struct page **pages;
-
- hl_debugfs_remove_userptr(hdev, userptr);
-
- if (userptr->dma_mapped)
- hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
- userptr->sgt->nents,
- userptr->dir);
-
- pages = frame_vector_pages(userptr->vec);
- if (!IS_ERR(pages)) {
- int i;
-
- for (i = 0; i < frame_vector_count(userptr->vec); i++)
- set_page_dirty_lock(pages[i]);
- }
- put_vaddr_frames(userptr->vec);
- frame_vector_destroy(userptr->vec);
-
- list_del(&userptr->job_node);
-
- sg_free_table(userptr->sgt);
- kfree(userptr->sgt);
-}
-
-/*
- * hl_userptr_delete_list - clear userptr list
- *
- * @hdev : pointer to the habanalabs device structure
- * @userptr_list : pointer to the list to clear
- *
- * This function does the following:
- * - Iterates over the list and unpins the host memory and frees the userptr
- * structure.
- */
-void hl_userptr_delete_list(struct hl_device *hdev,
- struct list_head *userptr_list)
-{
- struct hl_userptr *userptr, *tmp;
-
- list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
- hl_unpin_host_memory(hdev, userptr);
- kfree(userptr);
- }
-
- INIT_LIST_HEAD(userptr_list);
-}
-
-/*
- * hl_userptr_is_pinned - returns whether the given userptr is pinned
- *
- * @hdev : pointer to the habanalabs device structure
- * @userptr_list : pointer to the list to clear
- * @userptr : pointer to userptr to check
- *
- * This function does the following:
- * - Iterates over the list and checks if the given userptr is in it, means is
- * pinned. If so, returns true, otherwise returns false.
- */
-bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
- u32 size, struct list_head *userptr_list,
- struct hl_userptr **userptr)
-{
- list_for_each_entry((*userptr), userptr_list, job_node) {
- if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
- return true;
- }
-
- return false;
-}
-
-/*
- * va_range_init - initialize virtual addresses range
- * @hdev: pointer to the habanalabs device structure
- * @va_range: pointer to the range to initialize
- * @start: range start address
- * @end: range end address
- *
- * This function does the following:
- * - Initializes the virtual addresses list of the given range with the given
- * addresses.
- */
-static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
- u64 start, u64 end)
-{
- int rc;
-
- INIT_LIST_HEAD(&va_range->list);
-
- /* PAGE_SIZE alignment */
-
- if (start & (PAGE_SIZE - 1)) {
- start &= PAGE_MASK;
- start += PAGE_SIZE;
- }
-
- if (end & (PAGE_SIZE - 1))
- end &= PAGE_MASK;
-
- if (start >= end) {
- dev_err(hdev->dev, "too small vm range for va list\n");
- return -EFAULT;
- }
-
- rc = add_va_block(hdev, va_range, start, end);
-
- if (rc) {
- dev_err(hdev->dev, "Failed to init host va list\n");
- return rc;
- }
-
- va_range->start_addr = start;
- va_range->end_addr = end;
-
- return 0;
-}
-
-/*
- * va_range_fini() - clear a virtual addresses range
- * @hdev: pointer to the habanalabs structure
- * va_range: pointer to virtual addresses range
- *
- * This function does the following:
- * - Frees the virtual addresses block list and its lock
- */
-static void va_range_fini(struct hl_device *hdev,
- struct hl_va_range *va_range)
-{
- mutex_lock(&va_range->lock);
- clear_va_list_locked(hdev, &va_range->list);
- mutex_unlock(&va_range->lock);
-
- mutex_destroy(&va_range->lock);
- kfree(va_range);
-}
-
-/*
- * vm_ctx_init_with_ranges() - initialize virtual memory for context
- * @ctx: pointer to the habanalabs context structure
- * @host_range_start: host virtual addresses range start.
- * @host_range_end: host virtual addresses range end.
- * @host_huge_range_start: host virtual addresses range start for memory
- * allocated with huge pages.
- * @host_huge_range_end: host virtual addresses range end for memory allocated
- * with huge pages.
- * @dram_range_start: dram virtual addresses range start.
- * @dram_range_end: dram virtual addresses range end.
- *
- * This function initializes the following:
- * - MMU for context
- * - Virtual address to area descriptor hashtable
- * - Virtual block list of available virtual memory
- */
-static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
- u64 host_range_start,
- u64 host_range_end,
- u64 host_huge_range_start,
- u64 host_huge_range_end,
- u64 dram_range_start,
- u64 dram_range_end)
-{
- struct hl_device *hdev = ctx->hdev;
- int rc;
-
- ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL);
- if (!ctx->host_va_range)
- return -ENOMEM;
-
- ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range),
- GFP_KERNEL);
- if (!ctx->host_huge_va_range) {
- rc = -ENOMEM;
- goto host_huge_va_range_err;
- }
-
- ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL);
- if (!ctx->dram_va_range) {
- rc = -ENOMEM;
- goto dram_va_range_err;
- }
-
- rc = hl_mmu_ctx_init(ctx);
- if (rc) {
- dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
- goto mmu_ctx_err;
- }
-
- mutex_init(&ctx->mem_hash_lock);
- hash_init(ctx->mem_hash);
-
- mutex_init(&ctx->host_va_range->lock);
-
- rc = va_range_init(hdev, ctx->host_va_range, host_range_start,
- host_range_end);
- if (rc) {
- dev_err(hdev->dev, "failed to init host vm range\n");
- goto host_page_range_err;
- }
-
- if (hdev->pmmu_huge_range) {
- mutex_init(&ctx->host_huge_va_range->lock);
-
- rc = va_range_init(hdev, ctx->host_huge_va_range,
- host_huge_range_start,
- host_huge_range_end);
- if (rc) {
- dev_err(hdev->dev,
- "failed to init host huge vm range\n");
- goto host_hpage_range_err;
- }
- } else {
- ctx->host_huge_va_range = ctx->host_va_range;
- }
-
- mutex_init(&ctx->dram_va_range->lock);
-
- rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start,
- dram_range_end);
- if (rc) {
- dev_err(hdev->dev, "failed to init dram vm range\n");
- goto dram_vm_err;
- }
-
- hl_debugfs_add_ctx_mem_hash(hdev, ctx);
-
- return 0;
-
-dram_vm_err:
- mutex_destroy(&ctx->dram_va_range->lock);
-
- if (hdev->pmmu_huge_range) {
- mutex_lock(&ctx->host_huge_va_range->lock);
- clear_va_list_locked(hdev, &ctx->host_huge_va_range->list);
- mutex_unlock(&ctx->host_huge_va_range->lock);
- }
-host_hpage_range_err:
- if (hdev->pmmu_huge_range)
- mutex_destroy(&ctx->host_huge_va_range->lock);
- mutex_lock(&ctx->host_va_range->lock);
- clear_va_list_locked(hdev, &ctx->host_va_range->list);
- mutex_unlock(&ctx->host_va_range->lock);
-host_page_range_err:
- mutex_destroy(&ctx->host_va_range->lock);
- mutex_destroy(&ctx->mem_hash_lock);
- hl_mmu_ctx_fini(ctx);
-mmu_ctx_err:
- kfree(ctx->dram_va_range);
-dram_va_range_err:
- kfree(ctx->host_huge_va_range);
-host_huge_va_range_err:
- kfree(ctx->host_va_range);
-
- return rc;
-}
-
-int hl_vm_ctx_init(struct hl_ctx *ctx)
-{
- struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
- u64 host_range_start, host_range_end, host_huge_range_start,
- host_huge_range_end, dram_range_start, dram_range_end;
-
- atomic64_set(&ctx->dram_phys_mem, 0);
-
- /*
- * - If MMU is enabled, init the ranges as usual.
- * - If MMU is disabled, in case of host mapping, the returned address
- * is the given one.
- * In case of DRAM mapping, the returned address is the physical
- * address of the memory related to the given handle.
- */
- if (ctx->hdev->mmu_enable) {
- dram_range_start = prop->dmmu.start_addr;
- dram_range_end = prop->dmmu.end_addr;
- host_range_start = prop->pmmu.start_addr;
- host_range_end = prop->pmmu.end_addr;
- host_huge_range_start = prop->pmmu_huge.start_addr;
- host_huge_range_end = prop->pmmu_huge.end_addr;
- } else {
- dram_range_start = prop->dram_user_base_address;
- dram_range_end = prop->dram_end_address;
- host_range_start = prop->dram_user_base_address;
- host_range_end = prop->dram_end_address;
- host_huge_range_start = prop->dram_user_base_address;
- host_huge_range_end = prop->dram_end_address;
- }
-
- return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
- host_huge_range_start,
- host_huge_range_end,
- dram_range_start,
- dram_range_end);
-}
-
-/*
- * hl_vm_ctx_fini - virtual memory teardown of context
- *
- * @ctx : pointer to the habanalabs context structure
- *
- * This function perform teardown the following:
- * - Virtual block list of available virtual memory
- * - Virtual address to area descriptor hashtable
- * - MMU for context
- *
- * In addition this function does the following:
- * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
- * hashtable should be empty as no valid mappings should exist at this
- * point.
- * - Frees any existing physical page list from the idr which relates to the
- * current context asid.
- * - This function checks the virtual block list for correctness. At this point
- * the list should contain one element which describes the whole virtual
- * memory range of the context. Otherwise, a warning is printed.
- */
-void hl_vm_ctx_fini(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
- struct hl_vm_phys_pg_pack *phys_pg_list;
- struct hl_vm_hash_node *hnode;
- struct hlist_node *tmp_node;
- int i;
-
- hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
-
- /*
- * Clearly something went wrong on hard reset so no point in printing
- * another side effect error
- */
- if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
- dev_notice(hdev->dev,
- "user released device without removing its memory mappings\n");
-
- hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
- dev_dbg(hdev->dev,
- "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
- hnode->vaddr, ctx->asid);
- unmap_device_va(ctx, hnode->vaddr, true);
- }
-
- /* invalidate the cache once after the unmapping loop */
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
- hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
-
- spin_lock(&vm->idr_lock);
- idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
- if (phys_pg_list->asid == ctx->asid) {
- dev_dbg(hdev->dev,
- "page list 0x%px of asid %d is still alive\n",
- phys_pg_list, ctx->asid);
- atomic64_sub(phys_pg_list->total_size,
- &hdev->dram_used_mem);
- free_phys_pg_pack(hdev, phys_pg_list);
- idr_remove(&vm->phys_pg_pack_handles, i);
- }
- spin_unlock(&vm->idr_lock);
-
- va_range_fini(hdev, ctx->dram_va_range);
- if (hdev->pmmu_huge_range)
- va_range_fini(hdev, ctx->host_huge_va_range);
- va_range_fini(hdev, ctx->host_va_range);
-
- mutex_destroy(&ctx->mem_hash_lock);
- hl_mmu_ctx_fini(ctx);
-}
-
-/*
- * hl_vm_init - initialize virtual memory module
- *
- * @hdev : pointer to the habanalabs device structure
- *
- * This function initializes the following:
- * - MMU module
- * - DRAM physical pages pool of 2MB
- * - Idr for device memory allocation handles
- */
-int hl_vm_init(struct hl_device *hdev)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_vm *vm = &hdev->vm;
- int rc;
-
- vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
- if (!vm->dram_pg_pool) {
- dev_err(hdev->dev, "Failed to create dram page pool\n");
- return -ENOMEM;
- }
-
- kref_init(&vm->dram_pg_pool_refcount);
-
- rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
- prop->dram_end_address - prop->dram_user_base_address,
- -1);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to add memory to dram page pool %d\n", rc);
- goto pool_add_err;
- }
-
- spin_lock_init(&vm->idr_lock);
- idr_init(&vm->phys_pg_pack_handles);
-
- atomic64_set(&hdev->dram_used_mem, 0);
-
- vm->init_done = true;
-
- return 0;
-
-pool_add_err:
- gen_pool_destroy(vm->dram_pg_pool);
-
- return rc;
-}
-
-/*
- * hl_vm_fini - virtual memory module teardown
- *
- * @hdev : pointer to the habanalabs device structure
- *
- * This function perform teardown to the following:
- * - Idr for device memory allocation handles
- * - DRAM physical pages pool of 2MB
- * - MMU module
- */
-void hl_vm_fini(struct hl_device *hdev)
-{
- struct hl_vm *vm = &hdev->vm;
-
- if (!vm->init_done)
- return;
-
- /*
- * At this point all the contexts should be freed and hence no DRAM
- * memory should be in use. Hence the DRAM pool should be freed here.
- */
- if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
- dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
- __func__);
-
- vm->init_done = false;
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hw_ip/mmu/mmu_general.h"
-
-#include <linux/genalloc.h>
-#include <linux/slab.h>
-
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
-
-static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
-{
- struct pgt_info *pgt_info = NULL;
-
- hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
- (unsigned long) hop_addr)
- if (hop_addr == pgt_info->shadow_addr)
- break;
-
- return pgt_info;
-}
-
-static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
-{
- struct hl_device *hdev = ctx->hdev;
-
- gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
- hdev->asic_prop.mmu_hop_table_size);
- hash_del(&pgt_info->node);
- kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
- kfree(pgt_info);
-}
-
-static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
-{
- struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
-
- _free_hop(ctx, pgt_info);
-}
-
-static u64 alloc_hop(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct pgt_info *pgt_info;
- u64 phys_addr, shadow_addr;
-
- pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
- if (!pgt_info)
- return ULLONG_MAX;
-
- phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
- prop->mmu_hop_table_size);
- if (!phys_addr) {
- dev_err(hdev->dev, "failed to allocate page\n");
- goto pool_add_err;
- }
-
- shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
- GFP_KERNEL);
- if (!shadow_addr)
- goto shadow_err;
-
- pgt_info->phys_addr = phys_addr;
- pgt_info->shadow_addr = shadow_addr;
- pgt_info->ctx = ctx;
- pgt_info->num_of_ptes = 0;
- hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
-
- return shadow_addr;
-
-shadow_err:
- gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
-pool_add_err:
- kfree(pgt_info);
-
- return ULLONG_MAX;
-}
-
-static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
-{
- return ctx->hdev->asic_prop.mmu_pgt_addr +
- (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
- return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
- (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
-static inline void flush(struct hl_ctx *ctx)
-{
- /* flush all writes from all cores to reach PCI */
- mb();
- ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
-}
-
-/* transform the value to physical address when writing to H/W */
-static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
-{
- /*
- * The value to write is actually the address of the next shadow hop +
- * flags at the 12 LSBs.
- * Hence in order to get the value to write to the physical PTE, we
- * clear the 12 LSBs and translate the shadow hop to its associated
- * physical hop, and add back the original 12 LSBs.
- */
- u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
- (val & FLAGS_MASK);
-
- ctx->hdev->asic_funcs->write_pte(ctx->hdev,
- get_phys_addr(ctx, shadow_pte_addr),
- phys_val);
-
- *(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* do not transform the value to physical address when writing to H/W */
-static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
- u64 val)
-{
- ctx->hdev->asic_funcs->write_pte(ctx->hdev,
- get_phys_addr(ctx, shadow_pte_addr),
- val);
- *(u64 *) (uintptr_t) shadow_pte_addr = val;
-}
-
-/* clear the last and present bits */
-static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
-{
- /* no need to transform the value to physical address */
- write_final_pte(ctx, pte_addr, 0);
-}
-
-static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
- get_pgt_info(ctx, hop_addr)->num_of_ptes++;
-}
-
-/*
- * put_pte - decrement the num of ptes and free the hop if possible
- *
- * @ctx: pointer to the context structure
- * @hop_addr: addr of the hop
- *
- * This function returns the number of ptes left on this hop. If the number is
- * 0, it means the pte was freed.
- */
-static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
-{
- struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
- int num_of_ptes_left;
-
- pgt_info->num_of_ptes--;
-
- /*
- * Need to save the number of ptes left because free_hop might free
- * the pgt_info
- */
- num_of_ptes_left = pgt_info->num_of_ptes;
- if (!num_of_ptes_left)
- _free_hop(ctx, pgt_info);
-
- return num_of_ptes_left;
-}
-
-static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
- u64 virt_addr, u64 mask, u64 shift)
-{
- return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
- ((virt_addr & mask) >> shift);
-}
-
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
- mmu_prop->hop0_shift);
-}
-
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
- mmu_prop->hop1_shift);
-}
-
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
- mmu_prop->hop2_shift);
-}
-
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
- mmu_prop->hop3_shift);
-}
-
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
- struct hl_mmu_properties *mmu_prop,
- u64 hop_addr, u64 vaddr)
-{
- return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
- mmu_prop->hop4_shift);
-}
-
-static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
-{
- if (curr_pte & PAGE_PRESENT_MASK)
- return curr_pte & HOP_PHYS_ADDR_MASK;
- else
- return ULLONG_MAX;
-}
-
-static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
- bool *is_new_hop)
-{
- u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
-
- if (hop_addr == ULLONG_MAX) {
- hop_addr = alloc_hop(ctx);
- *is_new_hop = (hop_addr != ULLONG_MAX);
- }
-
- return hop_addr;
-}
-
-/* translates shadow address inside hop to a physical address */
-static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
-{
- u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
- u64 shadow_hop_addr = shadow_addr & ~page_mask;
- u64 pte_offset = shadow_addr & page_mask;
- u64 phys_hop_addr;
-
- if (shadow_hop_addr != get_hop0_addr(ctx))
- phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
- else
- phys_hop_addr = get_phys_hop0_addr(ctx);
-
- return phys_hop_addr + pte_offset;
-}
-
-static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
-
- return hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
- prop->dmmu.start_addr,
- prop->dmmu.end_addr);
-}
-
-static int dram_default_mapping_init(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
- hop2_pte_addr, hop3_pte_addr, pte_val;
- int rc, i, j, hop3_allocated = 0;
-
- if ((!hdev->dram_supports_virtual_memory) ||
- (!hdev->dram_default_page_mapping) ||
- (ctx->asid == HL_KERNEL_ASID_ID))
- return 0;
-
- num_of_hop3 = prop->dram_size_for_default_page_mapping;
- do_div(num_of_hop3, prop->dram_page_size);
- do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
-
- /* add hop1 and hop2 */
- total_hops = num_of_hop3 + 2;
-
- ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
- if (!ctx->dram_default_hops)
- return -ENOMEM;
-
- hop0_addr = get_hop0_addr(ctx);
-
- hop1_addr = alloc_hop(ctx);
- if (hop1_addr == ULLONG_MAX) {
- dev_err(hdev->dev, "failed to alloc hop 1\n");
- rc = -ENOMEM;
- goto hop1_err;
- }
-
- ctx->dram_default_hops[total_hops - 1] = hop1_addr;
-
- hop2_addr = alloc_hop(ctx);
- if (hop2_addr == ULLONG_MAX) {
- dev_err(hdev->dev, "failed to alloc hop 2\n");
- rc = -ENOMEM;
- goto hop2_err;
- }
-
- ctx->dram_default_hops[total_hops - 2] = hop2_addr;
-
- for (i = 0 ; i < num_of_hop3 ; i++) {
- ctx->dram_default_hops[i] = alloc_hop(ctx);
- if (ctx->dram_default_hops[i] == ULLONG_MAX) {
- dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
- rc = -ENOMEM;
- goto hop3_err;
- }
- hop3_allocated++;
- }
-
- /* need only pte 0 in hops 0 and 1 */
- pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop0_addr, pte_val);
-
- pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop1_addr, pte_val);
- get_pte(ctx, hop1_addr);
-
- hop2_pte_addr = hop2_addr;
- for (i = 0 ; i < num_of_hop3 ; i++) {
- pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
- PAGE_PRESENT_MASK;
- write_pte(ctx, hop2_pte_addr, pte_val);
- get_pte(ctx, hop2_addr);
- hop2_pte_addr += HL_PTE_SIZE;
- }
-
- pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
- LAST_MASK | PAGE_PRESENT_MASK;
-
- for (i = 0 ; i < num_of_hop3 ; i++) {
- hop3_pte_addr = ctx->dram_default_hops[i];
- for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
- write_final_pte(ctx, hop3_pte_addr, pte_val);
- get_pte(ctx, ctx->dram_default_hops[i]);
- hop3_pte_addr += HL_PTE_SIZE;
- }
- }
-
- flush(ctx);
-
- return 0;
-
-hop3_err:
- for (i = 0 ; i < hop3_allocated ; i++)
- free_hop(ctx, ctx->dram_default_hops[i]);
-
- free_hop(ctx, hop2_addr);
-hop2_err:
- free_hop(ctx, hop1_addr);
-hop1_err:
- kfree(ctx->dram_default_hops);
-
- return rc;
-}
-
-static void dram_default_mapping_fini(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
- hop2_pte_addr, hop3_pte_addr;
- int i, j;
-
- if ((!hdev->dram_supports_virtual_memory) ||
- (!hdev->dram_default_page_mapping) ||
- (ctx->asid == HL_KERNEL_ASID_ID))
- return;
-
- num_of_hop3 = prop->dram_size_for_default_page_mapping;
- do_div(num_of_hop3, prop->dram_page_size);
- do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
-
- hop0_addr = get_hop0_addr(ctx);
- /* add hop1 and hop2 */
- total_hops = num_of_hop3 + 2;
- hop1_addr = ctx->dram_default_hops[total_hops - 1];
- hop2_addr = ctx->dram_default_hops[total_hops - 2];
-
- for (i = 0 ; i < num_of_hop3 ; i++) {
- hop3_pte_addr = ctx->dram_default_hops[i];
- for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
- clear_pte(ctx, hop3_pte_addr);
- put_pte(ctx, ctx->dram_default_hops[i]);
- hop3_pte_addr += HL_PTE_SIZE;
- }
- }
-
- hop2_pte_addr = hop2_addr;
- hop2_pte_addr = hop2_addr;
- for (i = 0 ; i < num_of_hop3 ; i++) {
- clear_pte(ctx, hop2_pte_addr);
- put_pte(ctx, hop2_addr);
- hop2_pte_addr += HL_PTE_SIZE;
- }
-
- clear_pte(ctx, hop1_addr);
- put_pte(ctx, hop1_addr);
- clear_pte(ctx, hop0_addr);
-
- kfree(ctx->dram_default_hops);
-
- flush(ctx);
-}
-
-/**
- * hl_mmu_init() - initialize the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
- * Return: 0 for success, non-zero for failure.
- */
-int hl_mmu_init(struct hl_device *hdev)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- int rc;
-
- if (!hdev->mmu_enable)
- return 0;
-
- hdev->mmu_pgt_pool =
- gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
-
- if (!hdev->mmu_pgt_pool) {
- dev_err(hdev->dev, "Failed to create page gen pool\n");
- return -ENOMEM;
- }
-
- rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
- prop->mmu_hop0_tables_total_size,
- prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
- -1);
- if (rc) {
- dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
- goto err_pool_add;
- }
-
- hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
- prop->mmu_hop_table_size,
- GFP_KERNEL | __GFP_ZERO);
- if (!hdev->mmu_shadow_hop0) {
- rc = -ENOMEM;
- goto err_pool_add;
- }
-
- /* MMU H/W init will be done in device hw_init() */
-
- return 0;
-
-err_pool_add:
- gen_pool_destroy(hdev->mmu_pgt_pool);
-
- return rc;
-}
-
-/**
- * hl_mmu_fini() - release the MMU module.
- * @hdev: habanalabs device structure.
- *
- * This function does the following:
- * - Disable MMU in H/W.
- * - Free the pgt_infos pool.
- *
- * All contexts should be freed before calling this function.
- */
-void hl_mmu_fini(struct hl_device *hdev)
-{
- if (!hdev->mmu_enable)
- return;
-
- /* MMU H/W fini was already done in device hw_fini() */
-
- kvfree(hdev->mmu_shadow_hop0);
- gen_pool_destroy(hdev->mmu_pgt_pool);
-}
-
-/**
- * hl_mmu_ctx_init() - initialize a context for using the MMU module.
- * @ctx: pointer to the context structure to initialize.
- *
- * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
- * page tables hops related to this context.
- * Return: 0 on success, non-zero otherwise.
- */
-int hl_mmu_ctx_init(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
-
- if (!hdev->mmu_enable)
- return 0;
-
- mutex_init(&ctx->mmu_lock);
- hash_init(ctx->mmu_shadow_hash);
-
- return dram_default_mapping_init(ctx);
-}
-
-/*
- * hl_mmu_ctx_fini - disable a ctx from using the mmu module
- *
- * @ctx: pointer to the context structure
- *
- * This function does the following:
- * - Free any pgts which were not freed yet
- * - Free the mutex
- * - Free DRAM default page mapping hops
- */
-void hl_mmu_ctx_fini(struct hl_ctx *ctx)
-{
- struct hl_device *hdev = ctx->hdev;
- struct pgt_info *pgt_info;
- struct hlist_node *tmp;
- int i;
-
- if (!hdev->mmu_enable)
- return;
-
- dram_default_mapping_fini(ctx);
-
- if (!hash_empty(ctx->mmu_shadow_hash))
- dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
- ctx->asid);
-
- hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
- dev_err_ratelimited(hdev->dev,
- "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
- pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
- _free_hop(ctx, pgt_info);
- }
-
- mutex_destroy(&ctx->mmu_lock);
-}
-
-static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
- u64 hop0_addr = 0, hop0_pte_addr = 0,
- hop1_addr = 0, hop1_pte_addr = 0,
- hop2_addr = 0, hop2_pte_addr = 0,
- hop3_addr = 0, hop3_pte_addr = 0,
- hop4_addr = 0, hop4_pte_addr = 0,
- curr_pte;
- bool is_huge, clear_hop3 = true;
-
- /* shifts and masks are the same in PMMU and HPMMU, use one of them */
- mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
-
- hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
- hop1_addr = get_next_hop_addr(ctx, curr_pte);
-
- if (hop1_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
- hop2_addr = get_next_hop_addr(ctx, curr_pte);
-
- if (hop2_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
-
- hop3_addr = get_next_hop_addr(ctx, curr_pte);
-
- if (hop3_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
-
- is_huge = curr_pte & LAST_MASK;
-
- if (is_dram_addr && !is_huge) {
- dev_err(hdev->dev,
- "DRAM unmapping should use huge pages only\n");
- return -EFAULT;
- }
-
- if (!is_huge) {
- hop4_addr = get_next_hop_addr(ctx, curr_pte);
-
- if (hop4_addr == ULLONG_MAX)
- goto not_mapped;
-
- hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
- virt_addr);
-
- curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
-
- clear_hop3 = false;
- }
-
- if (hdev->dram_default_page_mapping && is_dram_addr) {
- u64 default_pte = (prop->mmu_dram_default_page_addr &
- HOP_PHYS_ADDR_MASK) | LAST_MASK |
- PAGE_PRESENT_MASK;
- if (curr_pte == default_pte) {
- dev_err(hdev->dev,
- "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
- virt_addr);
- goto not_mapped;
- }
-
- if (!(curr_pte & PAGE_PRESENT_MASK)) {
- dev_err(hdev->dev,
- "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
- virt_addr);
- goto not_mapped;
- }
-
- write_final_pte(ctx, hop3_pte_addr, default_pte);
- put_pte(ctx, hop3_addr);
- } else {
- if (!(curr_pte & PAGE_PRESENT_MASK))
- goto not_mapped;
-
- if (hop4_addr)
- clear_pte(ctx, hop4_pte_addr);
- else
- clear_pte(ctx, hop3_pte_addr);
-
- if (hop4_addr && !put_pte(ctx, hop4_addr))
- clear_hop3 = true;
-
- if (!clear_hop3)
- goto mapped;
-
- clear_pte(ctx, hop3_pte_addr);
-
- if (put_pte(ctx, hop3_addr))
- goto mapped;
-
- clear_pte(ctx, hop2_pte_addr);
-
- if (put_pte(ctx, hop2_addr))
- goto mapped;
-
- clear_pte(ctx, hop1_pte_addr);
-
- if (put_pte(ctx, hop1_addr))
- goto mapped;
-
- clear_pte(ctx, hop0_pte_addr);
- }
-
-mapped:
- return 0;
-
-not_mapped:
- dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
- virt_addr);
-
- return -EINVAL;
-}
-
-/*
- * hl_mmu_unmap - unmaps a virtual addr
- *
- * @ctx: pointer to the context structure
- * @virt_addr: virt addr to map from
- * @page_size: size of the page to unmap
- * @flush_pte: whether to do a PCI flush
- *
- * This function does the following:
- * - Check that the virt addr is mapped
- * - Unmap the virt addr and frees pgts if possible
- * - Returns 0 on success, -EINVAL if the given addr is not mapped
- *
- * Because this function changes the page tables in the device and because it
- * changes the MMU hash, it must be protected by a lock.
- * However, because it maps only a single page, the lock should be implemented
- * in a higher level in order to protect the entire mapping of the memory area
- *
- * For optimization reasons PCI flush may be requested once after unmapping of
- * large area.
- */
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
- bool flush_pte)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
- u64 real_virt_addr;
- u32 real_page_size, npages;
- int i, rc = 0;
- bool is_dram_addr;
-
- if (!hdev->mmu_enable)
- return 0;
-
- is_dram_addr = is_dram_va(hdev, virt_addr);
-
- if (is_dram_addr)
- mmu_prop = &prop->dmmu;
- else if ((page_size % prop->pmmu_huge.page_size) == 0)
- mmu_prop = &prop->pmmu_huge;
- else
- mmu_prop = &prop->pmmu;
-
- /*
- * The H/W handles mapping of specific page sizes. Hence if the page
- * size is bigger, we break it to sub-pages and unmap them separately.
- */
- if ((page_size % mmu_prop->page_size) == 0) {
- real_page_size = mmu_prop->page_size;
- } else {
- dev_err(hdev->dev,
- "page size of %u is not %uKB aligned, can't unmap\n",
- page_size, mmu_prop->page_size >> 10);
-
- return -EFAULT;
- }
-
- npages = page_size / real_page_size;
- real_virt_addr = virt_addr;
-
- for (i = 0 ; i < npages ; i++) {
- rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
- if (rc)
- break;
-
- real_virt_addr += real_page_size;
- }
-
- if (flush_pte)
- flush(ctx);
-
- return rc;
-}
-
-static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
- u32 page_size, bool is_dram_addr)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
- u64 hop0_addr = 0, hop0_pte_addr = 0,
- hop1_addr = 0, hop1_pte_addr = 0,
- hop2_addr = 0, hop2_pte_addr = 0,
- hop3_addr = 0, hop3_pte_addr = 0,
- hop4_addr = 0, hop4_pte_addr = 0,
- curr_pte = 0;
- bool hop1_new = false, hop2_new = false, hop3_new = false,
- hop4_new = false, is_huge;
- int rc = -ENOMEM;
-
- /*
- * This mapping function can map a page or a huge page. For huge page
- * there are only 3 hops rather than 4. Currently the DRAM allocation
- * uses huge pages only but user memory could have been allocated with
- * one of the two page sizes. Since this is a common code for all the
- * three cases, we need this hugs page check.
- */
- if (is_dram_addr) {
- mmu_prop = &prop->dmmu;
- is_huge = true;
- } else if (page_size == prop->pmmu_huge.page_size) {
- mmu_prop = &prop->pmmu_huge;
- is_huge = true;
- } else {
- mmu_prop = &prop->pmmu;
- is_huge = false;
- }
-
- hop0_addr = get_hop0_addr(ctx);
- hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
-
- hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
- if (hop1_addr == ULLONG_MAX)
- goto err;
-
- hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
-
- hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
- if (hop2_addr == ULLONG_MAX)
- goto err;
-
- hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
-
- hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
- if (hop3_addr == ULLONG_MAX)
- goto err;
-
- hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
-
- if (!is_huge) {
- hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
- if (hop4_addr == ULLONG_MAX)
- goto err;
-
- hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
- virt_addr);
- curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
- }
-
- if (hdev->dram_default_page_mapping && is_dram_addr) {
- u64 default_pte = (prop->mmu_dram_default_page_addr &
- HOP_PHYS_ADDR_MASK) | LAST_MASK |
- PAGE_PRESENT_MASK;
-
- if (curr_pte != default_pte) {
- dev_err(hdev->dev,
- "DRAM: mapping already exists for virt_addr 0x%llx\n",
- virt_addr);
- rc = -EINVAL;
- goto err;
- }
-
- if (hop1_new || hop2_new || hop3_new || hop4_new) {
- dev_err(hdev->dev,
- "DRAM mapping should not allocate more hops\n");
- rc = -EFAULT;
- goto err;
- }
- } else if (curr_pte & PAGE_PRESENT_MASK) {
- dev_err(hdev->dev,
- "mapping already exists for virt_addr 0x%llx\n",
- virt_addr);
-
- dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
- dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
- dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
- dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
-
- if (!is_huge)
- dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
- *(u64 *) (uintptr_t) hop4_pte_addr,
- hop4_pte_addr);
-
- rc = -EINVAL;
- goto err;
- }
-
- curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
- | PAGE_PRESENT_MASK;
-
- if (is_huge)
- write_final_pte(ctx, hop3_pte_addr, curr_pte);
- else
- write_final_pte(ctx, hop4_pte_addr, curr_pte);
-
- if (hop1_new) {
- curr_pte =
- (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop0_pte_addr, curr_pte);
- }
- if (hop2_new) {
- curr_pte =
- (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop1_pte_addr, curr_pte);
- get_pte(ctx, hop1_addr);
- }
- if (hop3_new) {
- curr_pte =
- (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- write_pte(ctx, hop2_pte_addr, curr_pte);
- get_pte(ctx, hop2_addr);
- }
-
- if (!is_huge) {
- if (hop4_new) {
- curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
- PAGE_PRESENT_MASK;
- write_pte(ctx, hop3_pte_addr, curr_pte);
- get_pte(ctx, hop3_addr);
- }
-
- get_pte(ctx, hop4_addr);
- } else {
- get_pte(ctx, hop3_addr);
- }
-
- return 0;
-
-err:
- if (hop4_new)
- free_hop(ctx, hop4_addr);
- if (hop3_new)
- free_hop(ctx, hop3_addr);
- if (hop2_new)
- free_hop(ctx, hop2_addr);
- if (hop1_new)
- free_hop(ctx, hop1_addr);
-
- return rc;
-}
-
-/*
- * hl_mmu_map - maps a virtual addr to physical addr
- *
- * @ctx: pointer to the context structure
- * @virt_addr: virt addr to map from
- * @phys_addr: phys addr to map to
- * @page_size: physical page size
- * @flush_pte: whether to do a PCI flush
- *
- * This function does the following:
- * - Check that the virt addr is not mapped
- * - Allocate pgts as necessary in order to map the virt addr to the phys
- * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
- *
- * Because this function changes the page tables in the device and because it
- * changes the MMU hash, it must be protected by a lock.
- * However, because it maps only a single page, the lock should be implemented
- * in a higher level in order to protect the entire mapping of the memory area
- *
- * For optimization reasons PCI flush may be requested once after mapping of
- * large area.
- */
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
- bool flush_pte)
-{
- struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_mmu_properties *mmu_prop;
- u64 real_virt_addr, real_phys_addr;
- u32 real_page_size, npages;
- int i, rc, mapped_cnt = 0;
- bool is_dram_addr;
-
- if (!hdev->mmu_enable)
- return 0;
-
- is_dram_addr = is_dram_va(hdev, virt_addr);
-
- if (is_dram_addr)
- mmu_prop = &prop->dmmu;
- else if ((page_size % prop->pmmu_huge.page_size) == 0)
- mmu_prop = &prop->pmmu_huge;
- else
- mmu_prop = &prop->pmmu;
-
- /*
- * The H/W handles mapping of specific page sizes. Hence if the page
- * size is bigger, we break it to sub-pages and map them separately.
- */
- if ((page_size % mmu_prop->page_size) == 0) {
- real_page_size = mmu_prop->page_size;
- } else {
- dev_err(hdev->dev,
- "page size of %u is not %uKB aligned, can't unmap\n",
- page_size, mmu_prop->page_size >> 10);
-
- return -EFAULT;
- }
-
- WARN_ONCE((phys_addr & (real_page_size - 1)),
- "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
- phys_addr, real_page_size);
-
- npages = page_size / real_page_size;
- real_virt_addr = virt_addr;
- real_phys_addr = phys_addr;
-
- for (i = 0 ; i < npages ; i++) {
- rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
- real_page_size, is_dram_addr);
- if (rc)
- goto err;
-
- real_virt_addr += real_page_size;
- real_phys_addr += real_page_size;
- mapped_cnt++;
- }
-
- if (flush_pte)
- flush(ctx);
-
- return 0;
-
-err:
- real_virt_addr = virt_addr;
- for (i = 0 ; i < mapped_cnt ; i++) {
- if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
- dev_warn_ratelimited(hdev->dev,
- "failed to unmap va: 0x%llx\n", real_virt_addr);
-
- real_virt_addr += real_page_size;
- }
-
- flush(ctx);
-
- return rc;
-}
-
-/*
- * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
- *
- * @ctx: pointer to the context structure
- *
- */
-void hl_mmu_swap_out(struct hl_ctx *ctx)
-{
-
-}
-
-/*
- * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
- *
- * @ctx: pointer to the context structure
- *
- */
-void hl_mmu_swap_in(struct hl_ctx *ctx)
-{
-
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-#include "include/hw_ip/pci/pci_general.h"
-
-#include <linux/pci.h>
-#include <linux/bitfield.h>
-
-#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
-
-#define IATU_REGION_CTRL_REGION_EN_MASK BIT(31)
-#define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30)
-#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK BIT(19)
-#define IATU_REGION_CTRL_BAR_NUM_MASK GENMASK(10, 8)
-
-/**
- * hl_pci_bars_map() - Map PCI BARs.
- * @hdev: Pointer to hl_device structure.
- * @name: Array of BAR names.
- * @is_wc: Array with flag per BAR whether a write-combined mapping is needed.
- *
- * Request PCI regions and map them to kernel virtual addresses.
- *
- * Return: 0 on success, non-zero for failure.
- */
-int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
- bool is_wc[3])
-{
- struct pci_dev *pdev = hdev->pdev;
- int rc, i, bar;
-
- rc = pci_request_regions(pdev, HL_NAME);
- if (rc) {
- dev_err(hdev->dev, "Cannot obtain PCI resources\n");
- return rc;
- }
-
- for (i = 0 ; i < 3 ; i++) {
- bar = i * 2; /* 64-bit BARs */
- hdev->pcie_bar[bar] = is_wc[i] ?
- pci_ioremap_wc_bar(pdev, bar) :
- pci_ioremap_bar(pdev, bar);
- if (!hdev->pcie_bar[bar]) {
- dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n",
- is_wc[i] ? "_wc" : "", name[i]);
- rc = -ENODEV;
- goto err;
- }
- }
-
- return 0;
-
-err:
- for (i = 2 ; i >= 0 ; i--) {
- bar = i * 2; /* 64-bit BARs */
- if (hdev->pcie_bar[bar])
- iounmap(hdev->pcie_bar[bar]);
- }
-
- pci_release_regions(pdev);
-
- return rc;
-}
-
-/**
- * hl_pci_bars_unmap() - Unmap PCI BARS.
- * @hdev: Pointer to hl_device structure.
- *
- * Release all PCI BARs and unmap their virtual addresses.
- */
-static void hl_pci_bars_unmap(struct hl_device *hdev)
-{
- struct pci_dev *pdev = hdev->pdev;
- int i, bar;
-
- for (i = 2 ; i >= 0 ; i--) {
- bar = i * 2; /* 64-bit BARs */
- iounmap(hdev->pcie_bar[bar]);
- }
-
- pci_release_regions(pdev);
-}
-
-/**
- * hl_pci_elbi_write() - Write through the ELBI interface.
- * @hdev: Pointer to hl_device structure.
- * @addr: Address to write to
- * @data: Data to write
- *
- * Return: 0 on success, negative value for failure.
- */
-static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
-{
- struct pci_dev *pdev = hdev->pdev;
- ktime_t timeout;
- u64 msec;
- u32 val;
-
- if (hdev->pldm)
- msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
- else
- msec = HL_PCI_ELBI_TIMEOUT_MSEC;
-
- /* Clear previous status */
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
-
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
- PCI_CONFIG_ELBI_CTRL_WRITE);
-
- timeout = ktime_add_ms(ktime_get(), msec);
- for (;;) {
- pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
- if (val & PCI_CONFIG_ELBI_STS_MASK)
- break;
- if (ktime_compare(ktime_get(), timeout) > 0) {
- pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
- &val);
- break;
- }
-
- usleep_range(300, 500);
- }
-
- if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
- return 0;
-
- if (val & PCI_CONFIG_ELBI_STS_ERR) {
- dev_err(hdev->dev, "Error writing to ELBI\n");
- return -EIO;
- }
-
- if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
- dev_err(hdev->dev, "ELBI write didn't finish in time\n");
- return -EIO;
- }
-
- dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
- return -EIO;
-}
-
-/**
- * hl_pci_iatu_write() - iatu write routine.
- * @hdev: Pointer to hl_device structure.
- * @addr: Address to write to
- * @data: Data to write
- *
- * Return: 0 on success, negative value for failure.
- */
-int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u32 dbi_offset;
- int rc;
-
- dbi_offset = addr & 0xFFF;
-
- rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
- data);
-
- if (rc)
- return -EIO;
-
- return 0;
-}
-
-/**
- * hl_pci_reset_link_through_bridge() - Reset PCI link.
- * @hdev: Pointer to hl_device structure.
- */
-static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
-{
- struct pci_dev *pdev = hdev->pdev;
- struct pci_dev *parent_port;
- u16 val;
-
- parent_port = pdev->bus->self;
- pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
- val |= PCI_BRIDGE_CTL_BUS_RESET;
- pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
- ssleep(1);
-
- val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
- pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
- ssleep(3);
-}
-
-/**
- * hl_pci_set_inbound_region() - Configure inbound region
- * @hdev: Pointer to hl_device structure.
- * @region: Inbound region number.
- * @pci_region: Inbound region parameters.
- *
- * Configure the iATU inbound region.
- *
- * Return: 0 on success, negative value for failure.
- */
-int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
- struct hl_inbound_pci_region *pci_region)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 bar_phys_base, region_base, region_end_address;
- u32 offset, ctrl_reg_val;
- int rc = 0;
-
- /* region offset */
- offset = (0x200 * region) + 0x100;
-
- if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
- bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
- region_base = bar_phys_base + pci_region->offset_in_bar;
- region_end_address = region_base + pci_region->size - 1;
-
- rc |= hl_pci_iatu_write(hdev, offset + 0x8,
- lower_32_bits(region_base));
- rc |= hl_pci_iatu_write(hdev, offset + 0xC,
- upper_32_bits(region_base));
- rc |= hl_pci_iatu_write(hdev, offset + 0x10,
- lower_32_bits(region_end_address));
- }
-
- /* Point to the specified address */
- rc = hl_pci_iatu_write(hdev, offset + 0x14,
- lower_32_bits(pci_region->addr));
- rc |= hl_pci_iatu_write(hdev, offset + 0x18,
- upper_32_bits(pci_region->addr));
- rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
-
- /* Enable + bar/address match + match enable + bar number */
- ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
- ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
- pci_region->mode);
- ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
-
- if (pci_region->mode == PCI_BAR_MATCH_MODE)
- ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
- pci_region->bar);
-
- rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
-
- /* Return the DBI window to the default location */
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
-
- if (rc)
- dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
- pci_region->bar, pci_region->addr);
-
- return rc;
-}
-
-/**
- * hl_pci_set_outbound_region() - Configure outbound region 0
- * @hdev: Pointer to hl_device structure.
- * @pci_region: Outbound region parameters.
- *
- * Configure the iATU outbound region 0.
- *
- * Return: 0 on success, negative value for failure.
- */
-int hl_pci_set_outbound_region(struct hl_device *hdev,
- struct hl_outbound_pci_region *pci_region)
-{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 outbound_region_end_address;
- int rc = 0;
-
- /* Outbound Region 0 */
- outbound_region_end_address =
- pci_region->addr + pci_region->size - 1;
- rc |= hl_pci_iatu_write(hdev, 0x008,
- lower_32_bits(pci_region->addr));
- rc |= hl_pci_iatu_write(hdev, 0x00C,
- upper_32_bits(pci_region->addr));
- rc |= hl_pci_iatu_write(hdev, 0x010,
- lower_32_bits(outbound_region_end_address));
- rc |= hl_pci_iatu_write(hdev, 0x014, 0);
-
- if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
- rc |= hl_pci_iatu_write(hdev, 0x018, 0x08000000);
- else
- rc |= hl_pci_iatu_write(hdev, 0x018, 0);
-
- rc |= hl_pci_iatu_write(hdev, 0x020,
- upper_32_bits(outbound_region_end_address));
- /* Increase region size */
- rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
- /* Enable */
- rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
-
- /* Return the DBI window to the default location */
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
- rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
-
- return rc;
-}
-
-/**
- * hl_pci_set_dma_mask() - Set DMA masks for the device.
- * @hdev: Pointer to hl_device structure.
- *
- * This function sets the DMA masks (regular and consistent) for a specified
- * value. If it doesn't succeed, it tries to set it to a fall-back value
- *
- * Return: 0 on success, non-zero for failure.
- */
-static int hl_pci_set_dma_mask(struct hl_device *hdev)
-{
- struct pci_dev *pdev = hdev->pdev;
- int rc;
-
- /* set DMA mask */
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
- if (rc) {
- dev_err(hdev->dev,
- "Failed to set pci dma mask to %d bits, error %d\n",
- hdev->dma_mask, rc);
- return rc;
- }
-
- rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
- if (rc) {
- dev_err(hdev->dev,
- "Failed to set pci consistent dma mask to %d bits, error %d\n",
- hdev->dma_mask, rc);
- return rc;
- }
-
- return 0;
-}
-
-/**
- * hl_pci_init() - PCI initialization code.
- * @hdev: Pointer to hl_device structure.
- *
- * Set DMA masks, initialize the PCI controller and map the PCI BARs.
- *
- * Return: 0 on success, non-zero for failure.
- */
-int hl_pci_init(struct hl_device *hdev)
-{
- struct pci_dev *pdev = hdev->pdev;
- int rc;
-
- if (hdev->reset_pcilink)
- hl_pci_reset_link_through_bridge(hdev);
-
- rc = pci_enable_device_mem(pdev);
- if (rc) {
- dev_err(hdev->dev, "can't enable PCI device\n");
- return rc;
- }
-
- pci_set_master(pdev);
-
- rc = hdev->asic_funcs->pci_bars_map(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
- goto disable_device;
- }
-
- rc = hdev->asic_funcs->init_iatu(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to initialize iATU\n");
- goto disable_device;
- }
-
- rc = hl_pci_set_dma_mask(hdev);
- if (rc)
- goto disable_device;
-
- return 0;
-
-disable_device:
- pci_clear_master(pdev);
- pci_disable_device(pdev);
-
- return rc;
-}
-
-/**
- * hl_fw_fini() - PCI finalization code.
- * @hdev: Pointer to hl_device structure
- *
- * Unmap PCI bars and disable PCI device.
- */
-void hl_pci_fini(struct hl_device *hdev)
-{
- hl_pci_bars_unmap(hdev);
-
- pci_clear_master(hdev->pdev);
- pci_disable_device(hdev->pdev);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
- * All Rights Reserved.
- */
-
-#include "habanalabs.h"
-
-#include <linux/pci.h>
-
-#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */
-#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */
-
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
-{
- struct armcp_packet pkt;
- long result;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- if (curr)
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- else
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.pll_index = cpu_to_le32(pll_index);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SET_CLK_PKT_TIMEOUT, &result);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to get frequency of PLL %d, error %d\n",
- pll_index, rc);
- result = rc;
- }
-
- return result;
-}
-
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.pll_index = cpu_to_le32(pll_index);
- pkt.value = cpu_to_le64(freq);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SET_CLK_PKT_TIMEOUT, NULL);
-
- if (rc)
- dev_err(hdev->dev,
- "Failed to set frequency to PLL %d, error %d\n",
- pll_index, rc);
-}
-
-u64 hl_get_max_power(struct hl_device *hdev)
-{
- struct armcp_packet pkt;
- long result;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SET_PWR_PKT_TIMEOUT, &result);
-
- if (rc) {
- dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
- result = rc;
- }
-
- return result;
-}
-
-void hl_set_max_power(struct hl_device *hdev, u64 value)
-{
- struct armcp_packet pkt;
- int rc;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.value = cpu_to_le64(value);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- SET_PWR_PKT_TIMEOUT, NULL);
-
- if (rc)
- dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
-}
-
-static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver);
-}
-
-static ssize_t armcp_kernel_ver_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
-}
-
-static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
-}
-
-static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "0x%08x\n",
- hdev->asic_prop.armcp_info.cpld_version);
-}
-
-static ssize_t infineon_ver_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "0x%04x\n",
- hdev->asic_prop.armcp_info.infineon_version);
-}
-
-static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
-}
-
-static ssize_t thermal_ver_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
-}
-
-static ssize_t preboot_btl_ver_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver);
-}
-
-static ssize_t soft_reset_store(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
- long value;
- int rc;
-
- rc = kstrtoul(buf, 0, &value);
-
- if (rc) {
- count = -EINVAL;
- goto out;
- }
-
- if (!hdev->supports_soft_reset) {
- dev_err(hdev->dev, "Device does not support soft-reset\n");
- goto out;
- }
-
- dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
-
- hl_device_reset(hdev, false, false);
-
-out:
- return count;
-}
-
-static ssize_t hard_reset_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
- long value;
- int rc;
-
- rc = kstrtoul(buf, 0, &value);
-
- if (rc) {
- count = -EINVAL;
- goto out;
- }
-
- dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
-
- hl_device_reset(hdev, true, false);
-
-out:
- return count;
-}
-
-static ssize_t device_type_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
- char *str;
-
- switch (hdev->asic_type) {
- case ASIC_GOYA:
- str = "GOYA";
- break;
- case ASIC_GAUDI:
- str = "GAUDI";
- break;
- default:
- dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
- hdev->asic_type);
- return -EINVAL;
- }
-
- return sprintf(buf, "%s\n", str);
-}
-
-static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%04x:%02x:%02x.%x\n",
- pci_domain_nr(hdev->pdev->bus),
- hdev->pdev->bus->number,
- PCI_SLOT(hdev->pdev->devfn),
- PCI_FUNC(hdev->pdev->devfn));
-}
-
-static ssize_t status_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
- char *str;
-
- if (atomic_read(&hdev->in_reset))
- str = "In reset";
- else if (hdev->disabled)
- str = "Malfunction";
- else
- str = "Operational";
-
- return sprintf(buf, "%s\n", str);
-}
-
-static ssize_t soft_reset_cnt_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%d\n", hdev->soft_reset_cnt);
-}
-
-static ssize_t hard_reset_cnt_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
-
- return sprintf(buf, "%d\n", hdev->hard_reset_cnt);
-}
-
-static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
- long val;
-
- if (hl_device_disabled_or_in_reset(hdev))
- return -ENODEV;
-
- val = hl_get_max_power(hdev);
-
- return sprintf(buf, "%lu\n", val);
-}
-
-static ssize_t max_power_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct hl_device *hdev = dev_get_drvdata(dev);
- unsigned long value;
- int rc;
-
- if (hl_device_disabled_or_in_reset(hdev)) {
- count = -ENODEV;
- goto out;
- }
-
- rc = kstrtoul(buf, 0, &value);
-
- if (rc) {
- count = -EINVAL;
- goto out;
- }
-
- hdev->max_power = value;
- hl_set_max_power(hdev, value);
-
-out:
- return count;
-}
-
-static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
- struct bin_attribute *attr, char *buf, loff_t offset,
- size_t max_size)
-{
- struct device *dev = container_of(kobj, struct device, kobj);
- struct hl_device *hdev = dev_get_drvdata(dev);
- char *data;
- int rc;
-
- if (!max_size)
- return -EINVAL;
-
- data = kzalloc(max_size, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size);
- if (rc)
- goto out;
-
- memcpy(buf, data, max_size);
-
-out:
- kfree(data);
-
- return max_size;
-}
-
-static DEVICE_ATTR_RO(armcp_kernel_ver);
-static DEVICE_ATTR_RO(armcp_ver);
-static DEVICE_ATTR_RO(cpld_ver);
-static DEVICE_ATTR_RO(device_type);
-static DEVICE_ATTR_RO(fuse_ver);
-static DEVICE_ATTR_WO(hard_reset);
-static DEVICE_ATTR_RO(hard_reset_cnt);
-static DEVICE_ATTR_RO(infineon_ver);
-static DEVICE_ATTR_RW(max_power);
-static DEVICE_ATTR_RO(pci_addr);
-static DEVICE_ATTR_RO(preboot_btl_ver);
-static DEVICE_ATTR_WO(soft_reset);
-static DEVICE_ATTR_RO(soft_reset_cnt);
-static DEVICE_ATTR_RO(status);
-static DEVICE_ATTR_RO(thermal_ver);
-static DEVICE_ATTR_RO(uboot_ver);
-
-static struct bin_attribute bin_attr_eeprom = {
- .attr = {.name = "eeprom", .mode = (0444)},
- .size = PAGE_SIZE,
- .read = eeprom_read_handler
-};
-
-static struct attribute *hl_dev_attrs[] = {
- &dev_attr_armcp_kernel_ver.attr,
- &dev_attr_armcp_ver.attr,
- &dev_attr_cpld_ver.attr,
- &dev_attr_device_type.attr,
- &dev_attr_fuse_ver.attr,
- &dev_attr_hard_reset.attr,
- &dev_attr_hard_reset_cnt.attr,
- &dev_attr_infineon_ver.attr,
- &dev_attr_max_power.attr,
- &dev_attr_pci_addr.attr,
- &dev_attr_preboot_btl_ver.attr,
- &dev_attr_soft_reset.attr,
- &dev_attr_soft_reset_cnt.attr,
- &dev_attr_status.attr,
- &dev_attr_thermal_ver.attr,
- &dev_attr_uboot_ver.attr,
- NULL,
-};
-
-static struct bin_attribute *hl_dev_bin_attrs[] = {
- &bin_attr_eeprom,
- NULL
-};
-
-static struct attribute_group hl_dev_attr_group = {
- .attrs = hl_dev_attrs,
- .bin_attrs = hl_dev_bin_attrs,
-};
-
-static struct attribute_group hl_dev_clks_attr_group;
-
-static const struct attribute_group *hl_dev_attr_groups[] = {
- &hl_dev_attr_group,
- &hl_dev_clks_attr_group,
- NULL,
-};
-
-int hl_sysfs_init(struct hl_device *hdev)
-{
- int rc;
-
- if (hdev->asic_type == ASIC_GOYA)
- hdev->pm_mng_profile = PM_AUTO;
- else
- hdev->pm_mng_profile = PM_MANUAL;
- hdev->max_power = hdev->asic_prop.max_power_default;
-
- hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
-
- rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to add groups to device, error %d\n", rc);
- return rc;
- }
-
- return 0;
-}
-
-void hl_sysfs_fini(struct hl_device *hdev)
-{
- device_remove_groups(hdev->dev, hl_dev_attr_groups);
-}