Merge tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux...
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
index 561485d..383865b 100644 (file)
@@ -76,7 +76,7 @@
 #define GAUDI_PLDM_MMU_TIMEOUT_USEC    (MMU_CONFIG_TIMEOUT_USEC * 100)
 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC  (HL_DEVICE_TIMEOUT_USEC * 30)
 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC        (HL_DEVICE_TIMEOUT_USEC * 30)
-#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC        1000000         /* 1s */
+#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC        4000000         /* 4s */
 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC  4000000         /* 4s */
 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000        /* 15s */
 
 
 #define BIN_REG_STRING_SIZE    sizeof("0b10101010101010101010101010101010")
 
+#define MONITOR_SOB_STRING_SIZE                256
+
+static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
+       GAUDI_QUEUE_ID_DMA_0_0,
+       GAUDI_QUEUE_ID_DMA_0_1,
+       GAUDI_QUEUE_ID_DMA_0_2,
+       GAUDI_QUEUE_ID_DMA_0_3,
+       GAUDI_QUEUE_ID_DMA_1_0,
+       GAUDI_QUEUE_ID_DMA_1_1,
+       GAUDI_QUEUE_ID_DMA_1_2,
+       GAUDI_QUEUE_ID_DMA_1_3
+};
+
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
                "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
                "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -456,8 +469,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
                                        u32 size, u64 val);
 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
                                        u32 num_regs, u32 val);
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
-               u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
                                u32 tpc_id);
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -468,7 +479,6 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
                                u32 size, bool eb);
 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
                                struct hl_gen_wait_properties *prop);
-
 static inline enum hl_collective_mode
 get_collective_mode(struct hl_device *hdev, u32 queue_id)
 {
@@ -496,7 +506,11 @@ static inline void set_default_power_values(struct hl_device *hdev)
 
        if (hdev->card_type == cpucp_card_type_pmc) {
                prop->max_power_default = MAX_POWER_DEFAULT_PMC;
-               prop->dc_power_default = DC_POWER_DEFAULT_PMC;
+
+               if (prop->fw_security_enabled)
+                       prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
+               else
+                       prop->dc_power_default = DC_POWER_DEFAULT_PMC;
        } else {
                prop->max_power_default = MAX_POWER_DEFAULT_PCI;
                prop->dc_power_default = DC_POWER_DEFAULT_PCI;
@@ -645,6 +659,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
        prop->hard_reset_done_by_fw = false;
        prop->gic_interrupts_enable = true;
 
+       prop->server_type = HL_SERVER_TYPE_UNKNOWN;
+
        return 0;
 }
 
@@ -817,14 +833,14 @@ pci_init:
                                        GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
        if (rc) {
                if (hdev->reset_on_preboot_fail)
-                       hdev->asic_funcs->hw_fini(hdev, true);
+                       hdev->asic_funcs->hw_fini(hdev, true, false);
                goto pci_fini;
        }
 
        if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
                dev_info(hdev->dev,
                        "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true);
+               hdev->asic_funcs->hw_fini(hdev, true, false);
        }
 
        return 0;
@@ -1068,17 +1084,11 @@ static void gaudi_sob_group_hw_reset(struct kref *ref)
        struct gaudi_hw_sob_group *hw_sob_group =
                container_of(ref, struct gaudi_hw_sob_group, kref);
        struct hl_device *hdev = hw_sob_group->hdev;
-       u64 base_addr;
-       int rc;
+       int i;
 
-       base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-                       hw_sob_group->base_sob_id * 4;
-       rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
-                       base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
-       if (rc)
-               dev_err(hdev->dev,
-                       "failed resetting sob group - sob base %u, count %u",
-                       hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
+       for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
+               WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+                       (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
 
        kref_init(&hw_sob_group->kref);
 }
@@ -1215,6 +1225,20 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
        queue_id = job->hw_queue_id;
        prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
 
+       if (job->cs->encaps_signals) {
+               /* use the encaps signal handle store earlier in the flow
+                * and set the SOB information from the encaps
+                * signals handle
+                */
+               hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
+                                               cs_cmpl);
+
+               dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
+                               job->cs->sequence,
+                               cs_cmpl->hw_sob->sob_id,
+                               cs_cmpl->sob_val);
+       }
+
        /* Add to wait CBs using slave monitor */
        wait_prop.data = (void *) job->user_cb;
        wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
@@ -1225,7 +1249,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
        wait_prop.size = cb_size;
 
        dev_dbg(hdev->dev,
-               "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
+               "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
                cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
                prop->collective_slave_mon_id, queue_id);
 
@@ -1239,7 +1263,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
                        prop->collective_sob_id, cb_size, false);
 }
 
-static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
+static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
 {
        struct hl_cs_compl *signal_cs_cmpl =
                container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
@@ -1257,9 +1281,37 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
        gaudi = hdev->asic_specific;
        cprop = &gaudi->collective_props;
 
-       /* copy the SOB id and value of the signal CS */
-       cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
-       cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+       /* In encaps signals case the SOB info will be retrieved from
+        * the handle in gaudi_collective_slave_init_job.
+        */
+       if (!cs->encaps_signals) {
+               /* copy the SOB id and value of the signal CS */
+               cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+               cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+       }
+
+       /* check again if the signal cs already completed.
+        * if yes then don't send any wait cs since the hw_sob
+        * could be in reset already. if signal is not completed
+        * then get refcount to hw_sob to prevent resetting the sob
+        * while wait cs is not submitted.
+        * note that this check is protected by two locks,
+        * hw queue lock and completion object lock,
+        * and the same completion object lock also protects
+        * the hw_sob reset handler function.
+        * The hw_queue lock prevent out of sync of hw_sob
+        * refcount value, changed by signal/wait flows.
+        */
+       spin_lock(&signal_cs_cmpl->lock);
+
+       if (completion_done(&cs->signal_fence->completion)) {
+               spin_unlock(&signal_cs_cmpl->lock);
+               return -EINVAL;
+       }
+       /* Increment kref since all slave queues are now waiting on it */
+       kref_get(&cs_cmpl->hw_sob->kref);
+
+       spin_unlock(&signal_cs_cmpl->lock);
 
        /* Calculate the stream from collective master queue (1st job) */
        job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
@@ -1304,21 +1356,17 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
                                cprop->curr_sob_group_idx[stream], stream);
        }
 
-       /* Increment kref since all slave queues are now waiting on it */
-       kref_get(&cs_cmpl->hw_sob->kref);
-       /*
-        * Must put the signal fence after the SOB refcnt increment so
-        * the SOB refcnt won't turn 0 and reset the SOB before the
-        * wait CS was submitted.
-        */
        mb();
        hl_fence_put(cs->signal_fence);
        cs->signal_fence = NULL;
+
+       return 0;
 }
 
 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
                struct hl_ctx *ctx, struct hl_cs *cs,
-               enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
+               enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
+               u32 encaps_signal_offset)
 {
        struct hw_queue_properties *hw_queue_prop;
        struct hl_cs_counters_atomic *cntr;
@@ -1378,6 +1426,13 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
        job->user_cb_size = cb_size;
        job->hw_queue_id = queue_id;
 
+       /* since its guaranteed to have only one chunk in the collective wait
+        * cs, we can use this chunk to set the encapsulated signal offset
+        * in the jobs.
+        */
+       if (cs->encaps_signals)
+               job->encaps_sig_wait_offset = encaps_signal_offset;
+
        /*
         * No need in parsing, user CB is the patched CB.
         * We call hl_cb_destroy() out of two reasons - we don't need
@@ -1406,8 +1461,9 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 }
 
 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
-               struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-               u32 collective_engine_id)
+               struct hl_ctx *ctx, struct hl_cs *cs,
+               u32 wait_queue_id, u32 collective_engine_id,
+               u32 encaps_signal_offset)
 {
        struct gaudi_device *gaudi = hdev->asic_specific;
        struct hw_queue_properties *hw_queue_prop;
@@ -1457,7 +1513,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
                if (i == 0) {
                        queue_id = wait_queue_id;
                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
-                               HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
+                               HL_COLLECTIVE_MASTER, queue_id,
+                               wait_queue_id, encaps_signal_offset);
                } else {
                        if (nic_idx < NIC_NUMBER_OF_ENGINES) {
                                if (gaudi->hw_cap_initialized &
@@ -1477,7 +1534,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
                        }
 
                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
-                               HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
+                               HL_COLLECTIVE_SLAVE, queue_id,
+                               wait_queue_id, encaps_signal_offset);
                }
 
                if (rc)
@@ -1525,6 +1583,11 @@ static int gaudi_late_init(struct hl_device *hdev)
                return rc;
        }
 
+       /* Scrub both SRAM and DRAM */
+       rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
+       if (rc)
+               goto disable_pci_access;
+
        rc = gaudi_fetch_psoc_frequency(hdev);
        if (rc) {
                dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
@@ -1549,6 +1612,11 @@ static int gaudi_late_init(struct hl_device *hdev)
                goto disable_pci_access;
        }
 
+       /* We only support a single ASID for the user, so for the sake of optimization, just
+        * initialize the ASID one time during device initialization with the fixed value of 1
+        */
+       gaudi_mmu_prepare(hdev, 1);
+
        return 0;
 
 disable_pci_access:
@@ -1814,8 +1882,12 @@ static int gaudi_sw_init(struct hl_device *hdev)
        hdev->supports_sync_stream = true;
        hdev->supports_coresight = true;
        hdev->supports_staged_submission = true;
+       hdev->supports_wait_for_multi_cs = true;
 
-       gaudi_set_pci_memory_regions(hdev);
+       hdev->asic_funcs->set_pci_memory_regions(hdev);
+       hdev->stream_master_qid_arr =
+                               hdev->asic_funcs->get_stream_master_qid_arr();
+       hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
 
        return 0;
 
@@ -2617,7 +2689,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
                                tpc_id < TPC_NUMBER_OF_ENGINES;
                                tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
                /* Mask all arithmetic interrupts from TPC */
-               WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
+               WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
                /* Set 16 cache lines */
                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
                                ICACHE_FETCH_LINE_NUM, 2);
@@ -3764,7 +3836,7 @@ static void gaudi_disable_timestamp(struct hl_device *hdev)
        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
 }
 
-static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
+static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
        u32 wait_timeout_ms;
 
@@ -3776,6 +3848,9 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
        else
                wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
 
+       if (fw_reset)
+               goto skip_engines;
+
        gaudi_stop_nic_qmans(hdev);
        gaudi_stop_mme_qmans(hdev);
        gaudi_stop_tpc_qmans(hdev);
@@ -3801,6 +3876,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
 
        gaudi_disable_timestamp(hdev);
 
+skip_engines:
        gaudi_disable_msi(hdev);
 }
 
@@ -3833,6 +3909,9 @@ static int gaudi_mmu_init(struct hl_device *hdev)
        WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
 
+       /* mem cache invalidation */
+       WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
+
        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
 
        WREG32(mmMMU_UP_MMU_ENABLE, 1);
@@ -4165,7 +4244,7 @@ disable_queues:
        return rc;
 }
 
-static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
+static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
        struct cpu_dyn_regs *dyn_regs =
                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
@@ -4186,6 +4265,14 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
                cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
        }
 
+       if (fw_reset) {
+               dev_info(hdev->dev,
+                       "Firmware performs HARD reset, going to wait %dms\n",
+                       reset_timeout_ms);
+
+               goto skip_reset;
+       }
+
        driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
                                        !hdev->asic_prop.hard_reset_done_by_fw);
 
@@ -4262,6 +4349,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
                        reset_timeout_ms);
        }
 
+skip_reset:
        /*
         * After hard reset, we can't poll the BTM_FSM register because the PSOC
         * itself is in reset. Need to wait until the reset is deasserted
@@ -4715,8 +4803,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
                                "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
                                cur_addr, cur_addr + chunk_size);
 
-                       WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
-                       WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
+                       WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
+                       WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
                        WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
                                                lower_32_bits(cur_addr));
                        WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
@@ -5890,78 +5978,6 @@ release_cb:
        return rc;
 }
 
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
-               u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
-{
-       struct hl_ctx *ctx;
-       struct hl_pending_cb *pending_cb;
-       struct packet_msg_long *pkt;
-       u32 cb_size, ctl;
-       struct hl_cb *cb;
-       int i, rc;
-
-       mutex_lock(&hdev->fpriv_list_lock);
-       ctx = hdev->compute_ctx;
-
-       /* If no compute context available or context is going down
-        * memset registers directly
-        */
-       if (!ctx || kref_read(&ctx->refcount) == 0) {
-               rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
-               mutex_unlock(&hdev->fpriv_list_lock);
-               return rc;
-       }
-
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       cb_size = (sizeof(*pkt) * num_regs) +
-                       sizeof(struct packet_msg_prot) * 2;
-
-       if (cb_size > SZ_2M) {
-               dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
-               return -ENOMEM;
-       }
-
-       pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
-       if (!pending_cb)
-               return -ENOMEM;
-
-       cb = hl_cb_kernel_create(hdev, cb_size, false);
-       if (!cb) {
-               kfree(pending_cb);
-               return -EFAULT;
-       }
-
-       pkt = cb->kernel_address;
-
-       ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
-
-       for (i = 0; i < num_regs ; i++, pkt++) {
-               pkt->ctl = cpu_to_le32(ctl);
-               pkt->value = cpu_to_le32(val);
-               pkt->addr = cpu_to_le64(reg_base + (i * 4));
-       }
-
-       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
-
-       pending_cb->cb = cb;
-       pending_cb->cb_size = cb_size;
-       /* The queue ID MUST be an external queue ID. Otherwise, we will
-        * have undefined behavior
-        */
-       pending_cb->hw_queue_id = hw_queue_id;
-
-       spin_lock(&ctx->pending_cb_lock);
-       list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
-       spin_unlock(&ctx->pending_cb_lock);
-
-       return 0;
-}
-
 static int gaudi_restore_sm_registers(struct hl_device *hdev)
 {
        u64 base_addr;
@@ -6107,7 +6123,7 @@ static int gaudi_restore_user_registers(struct hl_device *hdev)
 
 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
 {
-       return gaudi_restore_user_registers(hdev);
+       return 0;
 }
 
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
@@ -6817,6 +6833,9 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
                                asid);
        }
 
+       gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
+       gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
+
        hdev->asic_funcs->set_clock_gating(hdev);
 
        mutex_unlock(&gaudi->clk_gate_mutex);
@@ -6866,7 +6885,8 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 
        dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
 
-       WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
+       WREG32(mmDMA0_CORE_PROT + dma_offset,
+                       BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
 
        rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
                                        job->job_cb_size, cb->bus_address);
@@ -6887,8 +6907,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
        }
 
 free_fence_ptr:
-       WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
-                       ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
+       WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
 
        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
                                        fence_dma_addr);
@@ -7262,7 +7281,7 @@ static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream
 
        cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
        size = RREG32(cq_tsize);
-       dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
+       dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
                                                        stream, cq_ptr, size);
 }
 
@@ -7318,7 +7337,7 @@ static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
 
                addr = le64_to_cpu(bd->ptr);
 
-               dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
+               dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
                                                        stream, ci, addr, len);
 
                /* get previous ci, wrap if needed */
@@ -7458,6 +7477,11 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
        bool extract_info_from_fw;
        int rc;
 
+       if (hdev->asic_prop.fw_security_enabled) {
+               extract_info_from_fw = true;
+               goto extract_ecc_info;
+       }
+
        switch (event_type) {
        case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
        case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
@@ -7530,6 +7554,7 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
                return;
        }
 
+extract_ecc_info:
        if (extract_info_from_fw) {
                ecc_address = le64_to_cpu(ecc_data->ecc_address);
                ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
@@ -7910,6 +7935,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
        u8 cause;
        int rc;
 
+       if (event_type >= GAUDI_EVENT_SIZE) {
+               dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+                               event_type, GAUDI_EVENT_SIZE - 1);
+               return;
+       }
+
        gaudi->events_stat[event_type]++;
        gaudi->events_stat_aggregate[event_type]++;
 
@@ -7981,10 +8012,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
                                        tpc_dec_event_to_tpc_id(event_type),
                                        "AXI_SLV_DEC_Error");
                if (reset_required) {
-                       dev_err(hdev->dev, "hard reset required due to %s\n",
+                       dev_err(hdev->dev, "reset required due to %s\n",
                                gaudi_irq_map_table[event_type].name);
 
-                       goto reset_device;
+                       hl_device_reset(hdev, 0);
                } else {
                        hl_fw_unmask_irq(hdev, event_type);
                }
@@ -8003,10 +8034,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
                                        tpc_krn_event_to_tpc_id(event_type),
                                        "KRN_ERR");
                if (reset_required) {
-                       dev_err(hdev->dev, "hard reset required due to %s\n",
+                       dev_err(hdev->dev, "reset required due to %s\n",
                                gaudi_irq_map_table[event_type].name);
 
-                       goto reset_device;
+                       hl_device_reset(hdev, 0);
                } else {
                        hl_fw_unmask_irq(hdev, event_type);
                }
@@ -8136,7 +8167,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
        return;
 
 reset_device:
-       if (hdev->hard_reset_on_fw_events)
+       if (hdev->asic_prop.fw_security_enabled)
+               hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
+       else if (hdev->hard_reset_on_fw_events)
                hl_device_reset(hdev, HL_RESET_HARD);
        else
                hl_fw_unmask_irq(hdev, event_type);
@@ -8668,11 +8701,20 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 
 static int gaudi_ctx_init(struct hl_ctx *ctx)
 {
+       int rc;
+
        if (ctx->asid == HL_KERNEL_ASID_ID)
                return 0;
 
-       gaudi_mmu_prepare(ctx->hdev, ctx->asid);
-       return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
+       rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
+       if (rc)
+               return rc;
+
+       rc = gaudi_restore_user_registers(ctx->hdev);
+       if (rc)
+               gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
+
+       return rc;
 }
 
 static void gaudi_ctx_fini(struct hl_ctx *ctx)
@@ -8701,6 +8743,11 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
                        sizeof(struct packet_msg_prot) * 2;
 }
 
+static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
+{
+       return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
+}
+
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
                                u32 size, bool eb)
 {
@@ -9007,16 +9054,12 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
 {
        struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
-       int rc;
 
        dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
                hw_sob->sob_id);
 
-       rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
-                       CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-                       hw_sob->sob_id * 4, 1, 0);
-       if (rc)
-               dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
+       WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+                       hw_sob->sob_id * 4, 0);
 
        kref_init(&hw_sob->kref);
 }
@@ -9185,6 +9228,34 @@ static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
                mon->status);
 }
 
+static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
+{
+       const size_t max_write = 10;
+       u32 gid, mask, sob;
+       int i, offset;
+
+       /* Sync object ID is calculated as follows:
+        * (8 * group_id + cleared bits in mask)
+        */
+       gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
+                       mon->arm_data);
+       mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
+                       mon->arm_data);
+
+       for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
+               max_write; mask >>= 1, i++) {
+               if (!(mask & 1)) {
+                       sob = gid * MONITOR_MAX_SOBS + i;
+
+                       if (offset > 0)
+                               offset += snprintf(sobs + offset, max_write,
+                                                       ", ");
+
+                       offset += snprintf(sobs + offset, max_write, "%u", sob);
+               }
+       }
+}
+
 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
                                struct hl_device *hdev,
                                struct hl_mon_state_dump *mon)
@@ -9192,14 +9263,17 @@ static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
        const char *name;
        char scratch_buf1[BIN_REG_STRING_SIZE],
                scratch_buf2[BIN_REG_STRING_SIZE];
+       char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
 
        name = hl_state_dump_get_monitor_name(hdev, mon);
        if (!name)
                name = "";
 
+       gaudi_fill_sobs_from_mon(monitored_sobs, mon);
+
        return hl_snprintf_resize(
                buf, size, offset,
-               "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
+               "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
                mon->id, name,
                FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
                                mon->arm_data),
@@ -9216,7 +9290,8 @@ static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
                        scratch_buf2, sizeof(scratch_buf2),
                        FIELD_GET(
                                SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
-                               mon->status)));
+                               mon->status)),
+               monitored_sobs);
 }
 
 
@@ -9319,6 +9394,11 @@ static void gaudi_state_dump_init(struct hl_device *hdev)
        sds->funcs = gaudi_state_dump_funcs;
 }
 
+static u32 *gaudi_get_stream_master_qid_arr(void)
+{
+       return gaudi_stream_master;
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
        .early_init = gaudi_early_init,
        .early_fini = gaudi_early_fini,
@@ -9405,7 +9485,10 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
        .init_firmware_loader = gaudi_init_firmware_loader,
        .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
-       .state_dump_init = gaudi_state_dump_init
+       .state_dump_init = gaudi_state_dump_init,
+       .get_sob_addr = gaudi_get_sob_addr,
+       .set_pci_memory_regions = gaudi_set_pci_memory_regions,
+       .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
 };
 
 /**