habanalabs: replace WARN/WARN_ON with dev_crit in driver
authorAlon Mizrahi <amizrahi@habana.ai>
Thu, 3 Dec 2020 15:32:19 +0000 (17:32 +0200)
committerOded Gabbay <ogabbay@kernel.org>
Wed, 27 Jan 2021 19:03:49 +0000 (21:03 +0200)
Often WARN is defined in data-centers as BUG and we would like to
avoid hanging the entire server on some internal error of the driver
(important as it might be).

Therefore, use dev_crit instead.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/asid.c
drivers/misc/habanalabs/common/command_buffer.c
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/mmu.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c

index a2fdf31..ede04c0 100644 (file)
@@ -50,8 +50,10 @@ unsigned long hl_asid_alloc(struct hl_device *hdev)
 
 void hl_asid_free(struct hl_device *hdev, unsigned long asid)
 {
-       if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
-                                               "Invalid ASID %lu", asid))
+       if (asid == HL_KERNEL_ASID_ID || asid >= hdev->asic_prop.max_asid) {
+               dev_crit(hdev->dev, "Invalid ASID %lu", asid);
                return;
+       }
+
        clear_bit(asid, hdev->asid_bitmap);
 }
index 6f6a904..d9adb9a 100644 (file)
@@ -635,10 +635,12 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 
        cb_handle >>= PAGE_SHIFT;
        cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
-       if (!cb)
+       /* hl_cb_get should never fail here */
+       if (!cb) {
+               dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
+                               (u32) cb_handle);
                goto destroy_cb;
+       }
 
        return cb;
 
index 348b0ec..f7fac82 100644 (file)
@@ -48,8 +48,8 @@ void hl_sob_reset_error(struct kref *ref)
        struct hl_device *hdev = hw_sob->hdev;
 
        dev_crit(hdev->dev,
-                       "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
-                       hw_sob->q_idx, hw_sob->sob_id);
+               "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
+               hw_sob->q_idx, hw_sob->sob_id);
 }
 
 /**
index 29358a9..13405d4 100644 (file)
@@ -1485,7 +1485,8 @@ void hl_device_fini(struct hl_device *hdev)
                usleep_range(50, 200);
                rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
                if (ktime_compare(ktime_get(), timeout) > 0) {
-                       WARN(1, "Failed to remove device because reset function did not finish\n");
+                       dev_crit(hdev->dev,
+                               "Failed to remove device because reset function did not finish\n");
                        return;
                }
        }
index 28a4638..62cfa41 100644 (file)
@@ -261,9 +261,10 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
                return -EFAULT;
        }
 
-       WARN_ONCE((phys_addr & (real_page_size - 1)),
-               "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
-               phys_addr, real_page_size);
+       if (phys_addr & (real_page_size - 1))
+               dev_crit(hdev->dev,
+                       "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
+                       phys_addr, real_page_size);
 
        npages = page_size / real_page_size;
        real_virt_addr = virt_addr;
index 5281cad..19c9e38 100644 (file)
@@ -5308,10 +5308,10 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                       (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -5380,10 +5380,10 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail here */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                               (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -5928,7 +5928,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
                return;
 
        if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
-               WARN(1, "asid %u is too big\n", asid);
+               dev_crit(hdev->dev, "asid %u is too big\n", asid);
                return;
        }
 
index 720484b..12d9e52 100644 (file)
@@ -3876,10 +3876,10 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail here */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                       (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -3948,10 +3948,10 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail here */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                       (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -5042,7 +5042,7 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
                return;
 
        if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
-               WARN(1, "asid %u is too big\n", asid);
+               dev_crit(hdev->dev, "asid %u is too big\n", asid);
                return;
        }