#define GAUDI_PLL_MAX 10
+#define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
+
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
};
+static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
+ { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
+ { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
+ { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
+ { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
+ { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
+ { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
+ { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
+ { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
+ { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
+ { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
+ { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
+ { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
+ { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
+ { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
+ { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
+ { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
+ { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
+ { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
+ { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
+ { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
+ { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
+ { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
+ { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
+ { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
+ { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
+ { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
+ { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
+};
+
+static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
+ { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
+ { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
+ { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
+ { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
+ { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
+ { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
+ { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
+ { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
+ { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
+ { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
+ { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
+};
+
+static s64 gaudi_state_dump_specs_props[] = {
+ [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
+ [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
+ [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
+ [SP_MON_OBJ_WR_ADDR_LOW] =
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
+ [SP_MON_OBJ_WR_ADDR_HIGH] =
+ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
+ [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
+ [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
+ [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
+ [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
+ [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
+ [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
+ [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
+ [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
+ [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
+ [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
+ [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
+ [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
+ [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
+ [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
+ [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
+ [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
+ [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
+ [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
+ [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
+ [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
+ [SP_FENCE0_CNT_OFFSET] =
+ mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
+ [SP_FENCE0_RDATA_OFFSET] =
+ mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
+ [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
+ [SP_NUM_CORES] = 1,
+};
+
+/* The order here is opposite to the order of the indexing in the h/w.
+ * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
+ */
+static const char * const gaudi_sync_manager_names[] = {
+ "SYNC_MGR_E_N",
+ "SYNC_MGR_W_N",
+ "SYNC_MGR_E_S",
+ "SYNC_MGR_W_S",
+ NULL
+};
+
struct ecc_info_extract_params {
u64 block_address;
u32 num_memories;
u32 size, u64 val);
static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
u32 num_regs, u32 val);
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
- u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
u32 tpc_id);
static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
u32 size, bool eb);
static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
struct hl_gen_wait_properties *prop);
-
static inline enum hl_collective_mode
get_collective_mode(struct hl_device *hdev, u32 queue_id)
{
get_collective_mode(hdev, i);
}
+ prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
prop->collective_first_sob = 0;
prop->collective_first_mon = 0;
prop->hard_reset_done_by_fw = false;
prop->gic_interrupts_enable = true;
+ prop->server_type = HL_SERVER_TYPE_UNKNOWN;
+
return 0;
}
struct gaudi_hw_sob_group *hw_sob_group =
container_of(ref, struct gaudi_hw_sob_group, kref);
struct hl_device *hdev = hw_sob_group->hdev;
- u64 base_addr;
- int rc;
+ int i;
- base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
- hw_sob_group->base_sob_id * 4;
- rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
- base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
- if (rc)
- dev_err(hdev->dev,
- "failed resetting sob group - sob base %u, count %u",
- hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
+ for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
+ WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+ (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
kref_init(&hw_sob_group->kref);
}
queue_id = job->hw_queue_id;
prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
+ if (job->cs->encaps_signals) {
+ /* use the encaps signal handle store earlier in the flow
+ * and set the SOB information from the encaps
+ * signals handle
+ */
+ hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
+ cs_cmpl);
+
+ dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
+ job->cs->sequence,
+ cs_cmpl->hw_sob->sob_id,
+ cs_cmpl->sob_val);
+ }
+
/* Add to wait CBs using slave monitor */
wait_prop.data = (void *) job->user_cb;
wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
wait_prop.size = cb_size;
dev_dbg(hdev->dev,
- "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
+ "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
prop->collective_slave_mon_id, queue_id);
prop->collective_sob_id, cb_size, false);
}
-static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
+static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
{
struct hl_cs_compl *signal_cs_cmpl =
container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
gaudi = hdev->asic_specific;
cprop = &gaudi->collective_props;
- /* copy the SOB id and value of the signal CS */
- cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
- cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+ /* In encaps signals case the SOB info will be retrieved from
+ * the handle in gaudi_collective_slave_init_job.
+ */
+ if (!cs->encaps_signals) {
+ /* copy the SOB id and value of the signal CS */
+ cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+ cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+ }
+
+ /* check again if the signal cs already completed.
+ * if yes then don't send any wait cs since the hw_sob
+ * could be in reset already. if signal is not completed
+ * then get refcount to hw_sob to prevent resetting the sob
+ * while wait cs is not submitted.
+ * note that this check is protected by two locks,
+ * hw queue lock and completion object lock,
+ * and the same completion object lock also protects
+ * the hw_sob reset handler function.
+ * The hw_queue lock prevent out of sync of hw_sob
+ * refcount value, changed by signal/wait flows.
+ */
+ spin_lock(&signal_cs_cmpl->lock);
+
+ if (completion_done(&cs->signal_fence->completion)) {
+ spin_unlock(&signal_cs_cmpl->lock);
+ return -EINVAL;
+ }
+ /* Increment kref since all slave queues are now waiting on it */
+ kref_get(&cs_cmpl->hw_sob->kref);
+
+ spin_unlock(&signal_cs_cmpl->lock);
/* Calculate the stream from collective master queue (1st job) */
job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
cprop->curr_sob_group_idx[stream], stream);
}
- /* Increment kref since all slave queues are now waiting on it */
- kref_get(&cs_cmpl->hw_sob->kref);
- /*
- * Must put the signal fence after the SOB refcnt increment so
- * the SOB refcnt won't turn 0 and reset the SOB before the
- * wait CS was submitted.
- */
mb();
hl_fence_put(cs->signal_fence);
cs->signal_fence = NULL;
+
+ return 0;
}
static int gaudi_collective_wait_create_job(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs,
- enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
+ enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
+ u32 encaps_signal_offset)
{
struct hw_queue_properties *hw_queue_prop;
struct hl_cs_counters_atomic *cntr;
job->user_cb_size = cb_size;
job->hw_queue_id = queue_id;
+ /* since its guaranteed to have only one chunk in the collective wait
+ * cs, we can use this chunk to set the encapsulated signal offset
+ * in the jobs.
+ */
+ if (cs->encaps_signals)
+ job->encaps_sig_wait_offset = encaps_signal_offset;
+
/*
* No need in parsing, user CB is the patched CB.
* We call hl_cb_destroy() out of two reasons - we don't need
}
static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
- struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
- u32 collective_engine_id)
+ struct hl_ctx *ctx, struct hl_cs *cs,
+ u32 wait_queue_id, u32 collective_engine_id,
+ u32 encaps_signal_offset)
{
struct gaudi_device *gaudi = hdev->asic_specific;
struct hw_queue_properties *hw_queue_prop;
if (i == 0) {
queue_id = wait_queue_id;
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
- HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
+ HL_COLLECTIVE_MASTER, queue_id,
+ wait_queue_id, encaps_signal_offset);
} else {
if (nic_idx < NIC_NUMBER_OF_ENGINES) {
if (gaudi->hw_cap_initialized &
}
rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
- HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
+ HL_COLLECTIVE_SLAVE, queue_id,
+ wait_queue_id, encaps_signal_offset);
}
if (rc)
return rc;
}
+ /* Scrub both SRAM and DRAM */
+ rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
+ if (rc)
+ goto disable_pci_access;
+
rc = gaudi_fetch_psoc_frequency(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
goto disable_pci_access;
}
+ /* We only support a single ASID for the user, so for the sake of optimization, just
+ * initialize the ASID one time during device initialization with the fixed value of 1
+ */
+ gaudi_mmu_prepare(hdev, 1);
+
return 0;
disable_pci_access:
hdev->supports_sync_stream = true;
hdev->supports_coresight = true;
hdev->supports_staged_submission = true;
+ hdev->supports_wait_for_multi_cs = true;
- gaudi_set_pci_memory_regions(hdev);
+ hdev->asic_funcs->set_pci_memory_regions(hdev);
return 0;
return gaudi_init_iatu(hdev);
}
-static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size)
{
int rc;
"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
cur_addr, cur_addr + chunk_size);
- WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
- WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
+ WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
+ WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
lower_32_bits(cur_addr));
WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
return rc;
}
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
- u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
-{
- struct hl_ctx *ctx;
- struct hl_pending_cb *pending_cb;
- struct packet_msg_long *pkt;
- u32 cb_size, ctl;
- struct hl_cb *cb;
- int i, rc;
-
- mutex_lock(&hdev->fpriv_list_lock);
- ctx = hdev->compute_ctx;
-
- /* If no compute context available or context is going down
- * memset registers directly
- */
- if (!ctx || kref_read(&ctx->refcount) == 0) {
- rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
- mutex_unlock(&hdev->fpriv_list_lock);
- return rc;
- }
-
- mutex_unlock(&hdev->fpriv_list_lock);
-
- cb_size = (sizeof(*pkt) * num_regs) +
- sizeof(struct packet_msg_prot) * 2;
-
- if (cb_size > SZ_2M) {
- dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
- return -ENOMEM;
- }
-
- pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
- if (!pending_cb)
- return -ENOMEM;
-
- cb = hl_cb_kernel_create(hdev, cb_size, false);
- if (!cb) {
- kfree(pending_cb);
- return -EFAULT;
- }
-
- pkt = cb->kernel_address;
-
- ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
- ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
- ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
- ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
- ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
-
- for (i = 0; i < num_regs ; i++, pkt++) {
- pkt->ctl = cpu_to_le32(ctl);
- pkt->value = cpu_to_le32(val);
- pkt->addr = cpu_to_le64(reg_base + (i * 4));
- }
-
- hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
-
- pending_cb->cb = cb;
- pending_cb->cb_size = cb_size;
- /* The queue ID MUST be an external queue ID. Otherwise, we will
- * have undefined behavior
- */
- pending_cb->hw_queue_id = hw_queue_id;
-
- spin_lock(&ctx->pending_cb_lock);
- list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
- spin_unlock(&ctx->pending_cb_lock);
-
- return 0;
-}
-
static int gaudi_restore_sm_registers(struct hl_device *hdev)
{
u64 base_addr;
asid);
}
+ gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
+ gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
+
hdev->asic_funcs->set_clock_gating(hdev);
mutex_unlock(&gaudi->clk_gate_mutex);
dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
- WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
+ WREG32(mmDMA0_CORE_PROT + dma_offset,
+ BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
job->job_cb_size, cb->bus_address);
}
free_fence_ptr:
- WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
- ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
+ WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
fence_dma_addr);
{
u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
+ /* Flip the bits as the enum is ordered in the opposite way */
+ index = (index ^ 0x3) & 0x3;
+
switch (sei_data->sei_cause) {
case SM_SEI_SO_OVERFLOW:
- dev_err(hdev->dev,
- "SM %u SEI Error: SO %u overflow/underflow",
- index, le32_to_cpu(sei_data->sei_log));
+ dev_err_ratelimited(hdev->dev,
+ "%s SEI Error: SOB Group %u overflow/underflow",
+ gaudi_sync_manager_names[index],
+ le32_to_cpu(sei_data->sei_log));
break;
case SM_SEI_LBW_4B_UNALIGNED:
- dev_err(hdev->dev,
- "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
- index, le32_to_cpu(sei_data->sei_log));
+ dev_err_ratelimited(hdev->dev,
+ "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
+ gaudi_sync_manager_names[index],
+ le32_to_cpu(sei_data->sei_log));
break;
case SM_SEI_AXI_RESPONSE_ERR:
- dev_err(hdev->dev,
- "SM %u SEI Error: AXI ID %u response error",
- index, le32_to_cpu(sei_data->sei_log));
+ dev_err_ratelimited(hdev->dev,
+ "%s SEI Error: AXI ID %u response error",
+ gaudi_sync_manager_names[index],
+ le32_to_cpu(sei_data->sei_log));
break;
default:
- dev_err(hdev->dev, "Unknown SM SEI cause %u",
+ dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
le32_to_cpu(sei_data->sei_log));
break;
}
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
>> EQ_CTL_EVENT_TYPE_SHIFT);
- u8 cause;
bool reset_required;
+ u8 cause;
+ int rc;
+
+ if (event_type >= GAUDI_EVENT_SIZE) {
+ dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+ event_type, GAUDI_EVENT_SIZE - 1);
+ return;
+ }
gaudi->events_stat[event_type]++;
gaudi->events_stat_aggregate[event_type]++;
gaudi_print_irq_info(hdev, event_type, false);
gaudi_print_sm_sei_info(hdev, event_type,
&eq_entry->sm_sei_data);
+ rc = hl_state_dump(hdev);
+ if (rc)
+ dev_err(hdev->dev,
+ "Error during system state dump %d\n", rc);
hl_fw_unmask_irq(hdev, event_type);
break;
if (ctx->asid == HL_KERNEL_ASID_ID)
return 0;
- gaudi_mmu_prepare(ctx->hdev, ctx->asid);
return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
}
sizeof(struct packet_msg_prot) * 2;
}
+static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
+{
+ return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
+}
+
static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
u32 size, bool eb)
{
static void gaudi_reset_sob(struct hl_device *hdev, void *data)
{
struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
- int rc;
dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
hw_sob->sob_id);
- rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
- CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
- hw_sob->sob_id * 4, 1, 0);
- if (rc)
- dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
+ WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+ hw_sob->sob_id * 4, 0);
kref_init(&hw_sob->kref);
}
}
}
+static int gaudi_add_sync_to_engine_map_entry(
+ struct hl_sync_to_engine_map *map, u32 reg_value,
+ enum hl_sync_engine_type engine_type, u32 engine_id)
+{
+ struct hl_sync_to_engine_map_entry *entry;
+
+ /* Reg value represents a partial address of sync object,
+ * it is used as unique identifier. For this we need to
+ * clear the cutoff cfg base bits from the value.
+ */
+ if (reg_value == 0 || reg_value == 0xffffffff)
+ return 0;
+ reg_value -= (u32)CFG_BASE;
+
+ /* create a new hash entry */
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+ entry->engine_type = engine_type;
+ entry->engine_id = engine_id;
+ entry->sync_id = reg_value;
+ hash_add(map->tb, &entry->node, reg_value);
+
+ return 0;
+}
+
+static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
+ struct hl_sync_to_engine_map *map)
+{
+ struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+ struct gaudi_device *gaudi = hdev->asic_specific;
+ int i, j, rc;
+ u32 reg_value;
+
+ /* Iterate over TPC engines */
+ for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
+ /* TPC registered must be accessed with clock gating disabled */
+ mutex_lock(&gaudi->clk_gate_mutex);
+ hdev->asic_funcs->disable_clock_gating(hdev);
+
+ reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
+ sds->props[SP_NEXT_TPC] * i);
+
+ /* We can reenable clock_gating */
+ hdev->asic_funcs->set_clock_gating(hdev);
+ mutex_unlock(&gaudi->clk_gate_mutex);
+
+ rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
+ ENGINE_TPC, i);
+ if (rc)
+ goto free_sync_to_engine_map;
+ }
+
+ /* Iterate over MME engines */
+ for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
+ for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
+ /* MME registered must be accessed with clock gating
+ * disabled
+ */
+ mutex_lock(&gaudi->clk_gate_mutex);
+ hdev->asic_funcs->disable_clock_gating(hdev);
+
+ reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
+ sds->props[SP_NEXT_MME] * i +
+ j * sizeof(u32));
+
+ /* We can reenable clock_gating */
+ hdev->asic_funcs->set_clock_gating(hdev);
+ mutex_unlock(&gaudi->clk_gate_mutex);
+
+ rc = gaudi_add_sync_to_engine_map_entry(
+ map, reg_value, ENGINE_MME,
+ i * sds->props[SP_SUB_MME_ENG_NUM] + j);
+ if (rc)
+ goto free_sync_to_engine_map;
+ }
+ }
+
+ /* Iterate over DMA engines */
+ for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
+ reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
+ sds->props[SP_DMA_QUEUES_OFFSET] * i);
+ rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
+ ENGINE_DMA, i);
+ if (rc)
+ goto free_sync_to_engine_map;
+ }
+
+ return 0;
+
+free_sync_to_engine_map:
+ hl_state_dump_free_sync_to_engine_map(map);
+
+ return rc;
+}
+
+static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
+{
+ return FIELD_GET(
+ SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
+ mon->status);
+}
+
+static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
+ struct hl_device *hdev,
+ struct hl_mon_state_dump *mon)
+{
+ const char *name;
+ char scratch_buf1[BIN_REG_STRING_SIZE],
+ scratch_buf2[BIN_REG_STRING_SIZE];
+
+ name = hl_state_dump_get_monitor_name(hdev, mon);
+ if (!name)
+ name = "";
+
+ return hl_snprintf_resize(
+ buf, size, offset,
+ "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
+ mon->id, name,
+ FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
+ mon->arm_data),
+ hl_format_as_binary(
+ scratch_buf1, sizeof(scratch_buf1),
+ FIELD_GET(
+ SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
+ mon->arm_data)),
+ FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
+ mon->arm_data),
+ mon->wr_data,
+ (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
+ hl_format_as_binary(
+ scratch_buf2, sizeof(scratch_buf2),
+ FIELD_GET(
+ SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
+ mon->status)));
+}
+
+
+static int gaudi_print_fences_single_engine(
+ struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
+ enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
+ size_t *size, size_t *offset)
+{
+ struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+ int rc = -ENOMEM, i;
+ u32 *statuses, *fences;
+
+ statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
+ sizeof(*statuses), GFP_KERNEL);
+ if (!statuses)
+ goto out;
+
+ fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
+ sds->props[SP_ENGINE_NUM_OF_QUEUES],
+ sizeof(*fences), GFP_KERNEL);
+ if (!fences)
+ goto free_status;
+
+ for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
+ statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
+
+ for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
+ sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
+ fences[i] = RREG32(base_offset + i * sizeof(u32));
+
+ /* The actual print */
+ for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
+ u32 fence_id;
+ u64 fence_cnt, fence_rdata;
+ const char *engine_name;
+
+ if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
+ statuses[i]))
+ continue;
+
+ fence_id =
+ FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
+ fence_cnt = base_offset + CFG_BASE +
+ sizeof(u32) *
+ (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
+ fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
+ sds->props[SP_FENCE0_RDATA_OFFSET];
+ engine_name = hl_sync_engine_to_string(engine_type);
+
+ rc = hl_snprintf_resize(
+ buf, size, offset,
+ "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
+ engine_name, engine_id,
+ i, fence_id,
+ fence_cnt, engine_name, engine_id, fence_id, i,
+ fence_rdata, engine_name, engine_id, fence_id, i,
+ fences[fence_id],
+ statuses[i]);
+ if (rc)
+ goto free_fences;
+ }
+
+ rc = 0;
+
+free_fences:
+ kfree(fences);
+free_status:
+ kfree(statuses);
+out:
+ return rc;
+}
+
+
+static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
+ .monitor_valid = gaudi_monitor_valid,
+ .print_single_monitor = gaudi_print_single_monitor,
+ .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
+ .print_fences_single_engine = gaudi_print_fences_single_engine,
+};
+
+static void gaudi_state_dump_init(struct hl_device *hdev)
+{
+ struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
+ hash_add(sds->so_id_to_str_tb,
+ &gaudi_so_id_to_str[i].node,
+ gaudi_so_id_to_str[i].id);
+
+ for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
+ hash_add(sds->monitor_id_to_str_tb,
+ &gaudi_monitor_id_to_str[i].node,
+ gaudi_monitor_id_to_str[i].id);
+
+ sds->props = gaudi_state_dump_specs_props;
+
+ sds->sync_namager_names = gaudi_sync_manager_names;
+
+ sds->funcs = gaudi_state_dump_funcs;
+}
+
static const struct hl_asic_funcs gaudi_funcs = {
.early_init = gaudi_early_init,
.early_fini = gaudi_early_fini,
.halt_engines = gaudi_halt_engines,
.suspend = gaudi_suspend,
.resume = gaudi_resume,
- .cb_mmap = gaudi_cb_mmap,
+ .mmap = gaudi_mmap,
.ring_doorbell = gaudi_ring_doorbell,
.pqe_write = gaudi_pqe_write,
.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
.enable_events_from_fw = gaudi_enable_events_from_fw,
.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
.init_firmware_loader = gaudi_init_firmware_loader,
- .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm
+ .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
+ .state_dump_init = gaudi_state_dump_init,
+ .get_sob_addr = gaudi_get_sob_addr,
+ .set_pci_memory_regions = gaudi_set_pci_memory_regions
};
/**