habanalabs: remove compute context pointer
authorOded Gabbay <ogabbay@kernel.org>
Tue, 30 Nov 2021 21:08:21 +0000 (23:08 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Sun, 26 Dec 2021 06:59:08 +0000 (08:59 +0200)
It was an error to save the compute context's pointer in the device
structure, as it allowed its use without proper ref-cnt.

Change the variable to a flag that only indicates whether there is
an active compute context. Code that needs the pointer will now
be forced to use proper internal APIs to get the pointer.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/context.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/goya/goya_hwmgr.c

index b288410..49e6f11 100644 (file)
@@ -165,7 +165,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
        hpriv->ctx = ctx;
 
        /* TODO: remove the following line for multiple process support */
-       hdev->compute_ctx = ctx;
+       hdev->is_compute_ctx_active = true;
 
        return 0;
 
index 407f6c5..bea05a5 100644 (file)
@@ -97,12 +97,12 @@ static void hpriv_release(struct kref *ref)
                        || hdev->reset_upon_device_release)
                hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
 
-       /* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
+       /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
         * thread, we don't care because the in_reset is marked so if a user will try to open
-        * the device it will fail on that, even if compute_ctx is NULL.
+        * the device it will fail on that, even if compute_ctx is false.
         */
        mutex_lock(&hdev->fpriv_list_lock);
-       hdev->compute_ctx = NULL;
+       hdev->is_compute_ctx_active = false;
        mutex_unlock(&hdev->fpriv_list_lock);
 
        kfree(hpriv);
@@ -1150,7 +1150,7 @@ kill_processes:
                        goto out_err;
                }
 
-               hdev->compute_ctx = NULL;
+               hdev->is_compute_ctx_active = false;
 
                rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
                if (rc) {
@@ -1403,7 +1403,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
                goto mmu_fini;
        }
 
-       hdev->compute_ctx = NULL;
+       hdev->is_compute_ctx_active = false;
 
        hdev->asic_funcs->state_dump_init(hdev);
 
index eec96e5..df19359 100644 (file)
@@ -2503,7 +2503,6 @@ struct last_error_session_info {
  * @fpriv_list: list of file private data structures. Each structure is created
  *              when a user opens the device
  * @fpriv_list_lock: protects the fpriv_list
- * @compute_ctx: current compute context executing.
  * @aggregated_cs_counters: aggregated cs counters among all contexts
  * @mmu_priv: device-specific MMU data.
  * @mmu_func: device-related MMU functions.
@@ -2601,6 +2600,7 @@ struct last_error_session_info {
  *                        cases where Linux was not loaded to device CPU
  * @supports_wait_for_multi_cs: true if wait for multi CS is supported
  * @is_in_soft_reset: Device is currently in soft reset process.
+ * @is_compute_ctx_active: Whether there is an active compute context executing.
  */
 struct hl_device {
        struct pci_dev                  *pdev;
@@ -2656,8 +2656,6 @@ struct hl_device {
        struct list_head                fpriv_list;
        struct mutex                    fpriv_list_lock;
 
-       struct hl_ctx                   *compute_ctx;
-
        struct hl_cs_counters_atomic    aggregated_cs_counters;
 
        struct hl_mmu_priv              mmu_priv;
@@ -2730,6 +2728,7 @@ struct hl_device {
        u8                              supports_wait_for_multi_cs;
        u8                              stream_master_qid_arr_size;
        u8                              is_in_soft_reset;
+       u8                              is_compute_ctx_active;
 
        /* Parameters for bring-up */
        u64                             nic_ports_mask;
index d4ef999..62a02ef 100644 (file)
@@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
                goto out_err;
        }
 
-       if (hdev->compute_ctx) {
+       if (hdev->is_compute_ctx_active) {
                dev_dbg_ratelimited(hdev->dev,
                        "Can't open %s because another user is working on it\n",
                        dev_name(hdev->dev));
index e54d60e..8d0f2cd 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -827,7 +827,7 @@ static void goya_set_freq_to_low_job(struct work_struct *work)
 
        mutex_lock(&hdev->fpriv_list_lock);
 
-       if (!hdev->compute_ctx)
+       if (!hdev->is_compute_ctx_active)
                goya_set_frequency(hdev, PLL_LOW);
 
        mutex_unlock(&hdev->fpriv_list_lock);
index 42985a8..76b4774 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -258,7 +258,7 @@ static ssize_t pm_mng_profile_store(struct device *dev,
 
        mutex_lock(&hdev->fpriv_list_lock);
 
-       if (hdev->compute_ctx) {
+       if (hdev->is_compute_ctx_active) {
                dev_err(hdev->dev,
                        "Can't change PM profile while compute context is opened on the device\n");
                count = -EPERM;