accel/habanalabs: add event queue extra validation
authorOfir Bitton <obitton@habana.ai>
Sun, 21 May 2023 07:24:13 +0000 (10:24 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Thu, 8 Jun 2023 09:35:56 +0000 (12:35 +0300)
In order to increase reliability of the event queue interface,
we apply to Gaudi2 the same mechanism we have in Gaudi1.
The extra validation is basically checking that the received
event index matches the expected index.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/accel/habanalabs/common/irq.c
drivers/accel/habanalabs/gaudi2/gaudi2.c

index c67895b..b1010d2 100644 (file)
@@ -430,7 +430,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
                cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe);
                if ((hdev->event_queue.check_eqe_index) &&
                                (((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) != cur_eqe_index)) {
-                       dev_dbg(hdev->dev,
+                       dev_err(hdev->dev,
                                "EQE %#x in queue is ready but index does not match %d!=%d",
                                cur_eqe,
                                ((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK),
index 0d41adf..20c4583 100644 (file)
@@ -3619,6 +3619,12 @@ static int gaudi2_sw_init(struct hl_device *hdev)
 
        prop->supports_compute_reset = true;
 
+       /* Event queue sanity check added in FW version 1.11 */
+       if (hl_is_fw_sw_ver_below(hdev, 1, 11))
+               hdev->event_queue.check_eqe_index = false;
+       else
+               hdev->event_queue.check_eqe_index = true;
+
        hdev->asic_funcs->set_pci_memory_regions(hdev);
 
        rc = gaudi2_special_blocks_iterator_config(hdev);