1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2022 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85 #define GAUDI_MAX_STRING_LEN 20
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
100 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
102 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
104 #define MONITOR_SOB_STRING_SIZE 256
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107 GAUDI_QUEUE_ID_DMA_0_0,
108 GAUDI_QUEUE_ID_DMA_0_1,
109 GAUDI_QUEUE_ID_DMA_0_2,
110 GAUDI_QUEUE_ID_DMA_0_3,
111 GAUDI_QUEUE_ID_DMA_1_0,
112 GAUDI_QUEUE_ID_DMA_1_1,
113 GAUDI_QUEUE_ID_DMA_1_2,
114 GAUDI_QUEUE_ID_DMA_1_3
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136 [0] = GAUDI_QUEUE_ID_DMA_0_0,
137 [1] = GAUDI_QUEUE_ID_DMA_0_1,
138 [2] = GAUDI_QUEUE_ID_DMA_0_2,
139 [3] = GAUDI_QUEUE_ID_DMA_0_3,
140 [4] = GAUDI_QUEUE_ID_DMA_1_0,
141 [5] = GAUDI_QUEUE_ID_DMA_1_1,
142 [6] = GAUDI_QUEUE_ID_DMA_1_2,
143 [7] = GAUDI_QUEUE_ID_DMA_1_3,
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
148 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
149 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
150 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
151 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
152 [PACKET_REPEAT] = sizeof(struct packet_repeat),
153 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
154 [PACKET_FENCE] = sizeof(struct packet_fence),
155 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
156 [PACKET_NOP] = sizeof(struct packet_nop),
157 [PACKET_STOP] = sizeof(struct packet_stop),
158 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
159 [PACKET_WAIT] = sizeof(struct packet_wait),
160 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
163 static inline bool validate_packet_id(enum packet_id id)
167 case PACKET_WREG_BULK:
168 case PACKET_MSG_LONG:
169 case PACKET_MSG_SHORT:
172 case PACKET_MSG_PROT:
177 case PACKET_ARB_POINT:
179 case PACKET_LOAD_AND_EXE:
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188 "tpc_address_exceed_slm",
190 "tpc_spu_mac_overflow",
191 "tpc_spu_addsub_overflow",
192 "tpc_spu_abs_overflow",
193 "tpc_spu_fp_dst_nan_inf",
194 "tpc_spu_fp_dst_denorm",
195 "tpc_vpu_mac_overflow",
196 "tpc_vpu_addsub_overflow",
197 "tpc_vpu_abs_overflow",
198 "tpc_vpu_fp_dst_nan_inf",
199 "tpc_vpu_fp_dst_denorm",
201 "tpc_illegal_instruction",
202 "tpc_pc_wrap_around",
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
215 "CP error due to undefined OPCODE",
216 "CP encountered STOP OPCODE",
218 "CP WRREG32 or WRBULK returned error",
220 "FENCE 0 inc over max value and clipped",
221 "FENCE 1 inc over max value and clipped",
222 "FENCE 2 inc over max value and clipped",
223 "FENCE 3 inc over max value and clipped",
224 "FENCE 0 dec under min value and clipped",
225 "FENCE 1 dec under min value and clipped",
226 "FENCE 2 dec under min value and clipped",
227 "FENCE 3 dec under min value and clipped"
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232 "Choice push while full error",
233 "Choice Q watchdog error",
234 "MSG AXI LBW returned with error"
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
397 static s64 gaudi_state_dump_specs_props[] = {
398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 [SP_MON_OBJ_WR_ADDR_LOW] =
402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 [SP_MON_OBJ_WR_ADDR_HIGH] =
404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 [SP_FENCE0_CNT_OFFSET] =
426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_FENCE0_RDATA_OFFSET] =
428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
433 static const int gaudi_queue_id_to_engine_id[] = {
434 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
465 /* The order here is opposite to the order of the indexing in the h/w.
466 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
468 static const char * const gaudi_sync_manager_names[] = {
476 struct ecc_info_extract_params {
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485 struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489 u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499 struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
503 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504 return HL_COLLECTIVE_MASTER;
506 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508 return HL_COLLECTIVE_SLAVE;
510 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512 return HL_COLLECTIVE_SLAVE;
514 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516 return HL_COLLECTIVE_SLAVE;
518 return HL_COLLECTIVE_NOT_SUPPORTED;
521 static inline void set_default_power_values(struct hl_device *hdev)
523 struct asic_fixed_properties *prop = &hdev->asic_prop;
525 if (hdev->card_type == cpucp_card_type_pmc) {
526 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
528 if (prop->fw_security_enabled)
529 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
531 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
533 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
540 struct asic_fixed_properties *prop = &hdev->asic_prop;
541 u32 num_sync_stream_queues = 0;
544 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545 prop->hw_queues_props = kcalloc(prop->max_queues,
546 sizeof(struct hw_queue_properties),
549 if (!prop->hw_queues_props)
552 for (i = 0 ; i < prop->max_queues ; i++) {
553 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555 prop->hw_queues_props[i].driver_only = 0;
556 prop->hw_queues_props[i].supports_sync_stream = 1;
557 prop->hw_queues_props[i].cb_alloc_flags =
559 num_sync_stream_queues++;
560 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562 prop->hw_queues_props[i].driver_only = 1;
563 prop->hw_queues_props[i].supports_sync_stream = 0;
564 prop->hw_queues_props[i].cb_alloc_flags =
566 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568 prop->hw_queues_props[i].driver_only = 0;
569 prop->hw_queues_props[i].supports_sync_stream = 0;
570 prop->hw_queues_props[i].cb_alloc_flags =
574 prop->hw_queues_props[i].collective_mode =
575 get_collective_mode(hdev, i);
578 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579 prop->cfg_base_address = CFG_BASE;
580 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581 prop->host_base_address = HOST_PHYS_BASE;
582 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584 prop->completion_mode = HL_COMPLETION_MODE_JOB;
585 prop->collective_first_sob = 0;
586 prop->collective_first_mon = 0;
588 /* 2 SOBs per internal queue stream are reserved for collective */
589 prop->sync_stream_first_sob =
590 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591 * QMAN_STREAMS * HL_RSVD_SOBS;
593 /* 1 monitor per internal queue stream are reserved for collective
594 * 2 monitors per external queue stream are reserved for collective
596 prop->sync_stream_first_mon =
597 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598 (NUMBER_OF_EXT_HW_QUEUES * 2);
600 prop->dram_base_address = DRAM_PHYS_BASE;
601 prop->dram_size = GAUDI_HBM_SIZE_32GB;
602 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
605 prop->sram_base_address = SRAM_BASE_ADDR;
606 prop->sram_size = SRAM_SIZE;
607 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608 prop->sram_user_base_address =
609 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
611 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
614 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
616 prop->mmu_pgt_size = 0x800000; /* 8MB */
618 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619 prop->mmu_pte_size = HL_PTE_SIZE;
620 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622 prop->dram_page_size = PAGE_SIZE_2MB;
623 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624 prop->dram_supports_virtual_memory = false;
626 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636 prop->pmmu.start_addr = VA_HOST_SPACE_START;
637 prop->pmmu.end_addr =
638 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639 prop->pmmu.page_size = PAGE_SIZE_4KB;
640 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641 prop->pmmu.last_mask = LAST_MASK;
642 /* TODO: will be duplicated until implementing per-MMU props */
643 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
646 /* PMMU and HPMMU are the same except of page size */
647 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
650 /* shifts and masks are the same in PMMU and DMMU */
651 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653 prop->dmmu.end_addr = VA_HOST_SPACE_END;
654 prop->dmmu.page_size = PAGE_SIZE_2MB;
656 prop->cfg_size = CFG_SIZE;
657 prop->max_asid = MAX_ASID;
658 prop->num_of_events = GAUDI_EVENT_SIZE;
659 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
661 set_default_power_values(hdev);
663 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
666 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
669 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
672 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
674 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675 prop->sync_stream_first_sob +
676 (num_sync_stream_queues * HL_RSVD_SOBS);
677 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678 prop->sync_stream_first_mon +
679 (num_sync_stream_queues * HL_RSVD_MONS);
681 prop->first_available_user_interrupt = USHRT_MAX;
683 for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 prop->first_available_cq[i] = USHRT_MAX;
686 prop->fw_cpu_boot_dev_sts0_valid = false;
687 prop->fw_cpu_boot_dev_sts1_valid = false;
688 prop->hard_reset_done_by_fw = false;
689 prop->gic_interrupts_enable = true;
691 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693 prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696 prop->use_get_power_for_reset_history = true;
698 prop->configurable_stop_on_err = true;
700 prop->set_max_power_on_device_init = true;
707 static int gaudi_pci_bars_map(struct hl_device *hdev)
709 static const char * const name[] = {"SRAM", "CFG", "HBM"};
710 bool is_wc[3] = {false, false, true};
713 rc = hl_pci_bars_map(hdev, name, is_wc);
717 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718 (CFG_BASE - SPI_FLASH_BASE_ADDR);
723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
725 struct gaudi_device *gaudi = hdev->asic_specific;
726 struct hl_inbound_pci_region pci_region;
730 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
733 if (hdev->asic_prop.iatu_done_by_fw)
736 /* Inbound Region 2 - Bar 4 - Point to HBM */
737 pci_region.mode = PCI_BAR_MATCH_MODE;
738 pci_region.bar = HBM_BAR_ID;
739 pci_region.addr = addr;
740 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
745 old_addr = gaudi->hbm_bar_cur_addr;
746 gaudi->hbm_bar_cur_addr = addr;
752 static int gaudi_init_iatu(struct hl_device *hdev)
754 struct hl_inbound_pci_region inbound_region;
755 struct hl_outbound_pci_region outbound_region;
758 if (hdev->asic_prop.iatu_done_by_fw)
761 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762 inbound_region.mode = PCI_BAR_MATCH_MODE;
763 inbound_region.bar = SRAM_BAR_ID;
764 inbound_region.addr = SRAM_BASE_ADDR;
765 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770 inbound_region.mode = PCI_BAR_MATCH_MODE;
771 inbound_region.bar = CFG_BAR_ID;
772 inbound_region.addr = SPI_FLASH_BASE_ADDR;
773 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777 /* Inbound Region 2 - Bar 4 - Point to HBM */
778 inbound_region.mode = PCI_BAR_MATCH_MODE;
779 inbound_region.bar = HBM_BAR_ID;
780 inbound_region.addr = DRAM_PHYS_BASE;
781 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785 /* Outbound Region 0 - Point to Host */
786 outbound_region.addr = HOST_PHYS_BASE;
787 outbound_region.size = HOST_PHYS_SIZE;
788 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
796 return RREG32(mmHW_STATE);
799 static int gaudi_early_init(struct hl_device *hdev)
801 struct asic_fixed_properties *prop = &hdev->asic_prop;
802 struct pci_dev *pdev = hdev->pdev;
803 resource_size_t pci_bar_size;
807 rc = gaudi_set_fixed_properties(hdev);
809 dev_err(hdev->dev, "Failed setting fixed properties\n");
813 /* Check BAR sizes */
814 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
816 if (pci_bar_size != SRAM_BAR_SIZE) {
817 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
820 goto free_queue_props;
823 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
825 if (pci_bar_size != CFG_BAR_SIZE) {
826 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
829 goto free_queue_props;
832 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
835 /* If FW security is enabled at this point it means no access to ELBI */
836 if (hdev->asic_prop.fw_security_enabled) {
837 hdev->asic_prop.iatu_done_by_fw = true;
840 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841 * decision can only be taken based on PCI ID security.
843 hdev->asic_prop.gic_interrupts_enable = false;
847 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
850 goto free_queue_props;
852 /* Check whether FW is configuring iATU */
853 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855 hdev->asic_prop.iatu_done_by_fw = true;
858 rc = hl_pci_init(hdev);
860 goto free_queue_props;
862 /* Before continuing in the initialization, we need to read the preboot
863 * version to determine whether we run with a security-enabled firmware
865 rc = hl_fw_read_preboot_status(hdev);
867 if (hdev->reset_on_preboot_fail)
868 hdev->asic_funcs->hw_fini(hdev, true, false);
872 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
873 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
874 hdev->asic_funcs->hw_fini(hdev, true, false);
882 kfree(hdev->asic_prop.hw_queues_props);
886 static int gaudi_early_fini(struct hl_device *hdev)
888 kfree(hdev->asic_prop.hw_queues_props);
895 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
897 * @hdev: pointer to hl_device structure
900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
902 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
903 struct asic_fixed_properties *prop = &hdev->asic_prop;
904 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
907 if ((hdev->fw_components & FW_TYPE_LINUX) &&
908 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
909 struct gaudi_device *gaudi = hdev->asic_specific;
911 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
914 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
919 freq = pll_freq_arr[2];
921 /* Backward compatibility */
922 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
923 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
924 nr = RREG32(mmPSOC_CPU_PLL_NR);
925 nf = RREG32(mmPSOC_CPU_PLL_NF);
926 od = RREG32(mmPSOC_CPU_PLL_OD);
928 if (div_sel == DIV_SEL_REF_CLK ||
929 div_sel == DIV_SEL_DIVIDED_REF) {
930 if (div_sel == DIV_SEL_REF_CLK)
933 freq = PLL_REF_CLK / (div_fctr + 1);
934 } else if (div_sel == DIV_SEL_PLL_CLK ||
935 div_sel == DIV_SEL_DIVIDED_PLL) {
936 pll_clk = PLL_REF_CLK * (nf + 1) /
937 ((nr + 1) * (od + 1));
938 if (div_sel == DIV_SEL_PLL_CLK)
941 freq = pll_clk / (div_fctr + 1);
943 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
948 prop->psoc_timestamp_frequency = freq;
949 prop->psoc_pci_pll_nr = nr;
950 prop->psoc_pci_pll_nf = nf;
951 prop->psoc_pci_pll_od = od;
952 prop->psoc_pci_pll_div_factor = div_fctr;
957 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
958 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
960 struct asic_fixed_properties *prop = &hdev->asic_prop;
961 struct packet_lin_dma *init_tpc_mem_pkt;
962 struct hl_cs_job *job;
969 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
973 init_tpc_mem_pkt = cb->kernel_address;
974 cb_size = sizeof(*init_tpc_mem_pkt);
975 memset(init_tpc_mem_pkt, 0, cb_size);
977 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
979 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
980 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
981 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
982 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
984 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
986 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
987 dst_addr = (prop->sram_user_base_address &
988 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
989 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
990 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
992 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
994 dev_err(hdev->dev, "Failed to allocate a new job\n");
1001 atomic_inc(&job->user_cb->cs_cnt);
1002 job->user_cb_size = cb_size;
1003 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1004 job->patched_cb = job->user_cb;
1005 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1007 hl_debugfs_add_job(hdev, job);
1009 rc = gaudi_send_job_on_qman0(hdev, job);
1014 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1015 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1021 hl_userptr_delete_list(hdev, &job->userptr_list);
1022 hl_debugfs_remove_job(hdev, job);
1024 atomic_dec(&cb->cs_cnt);
1028 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1034 * gaudi_init_tpc_mem() - Initialize TPC memories.
1035 * @hdev: Pointer to hl_device structure.
1037 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1039 * Return: 0 for success, negative value for error.
1041 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1043 const struct firmware *fw;
1046 dma_addr_t dma_handle;
1050 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1051 if (rc == -EINTR && count-- > 0) {
1057 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1063 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1066 "Failed to allocate %zu of dma memory for TPC kernel\n",
1072 memcpy(cpu_addr, fw->data, fw_size);
1074 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1076 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1079 release_firmware(fw);
1083 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1085 struct gaudi_device *gaudi = hdev->asic_specific;
1086 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1087 struct hl_hw_queue *q;
1088 u32 i, sob_id, sob_group_id, queue_id;
1090 /* Iterate through SOB groups and assign a SOB for each slave queue */
1092 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1093 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1095 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1096 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1097 q = &hdev->kernel_queues[queue_id + (4 * i)];
1098 q->sync_stream_prop.collective_sob_id = sob_id + i;
1101 /* Both DMA5 and TPC7 use the same resources since only a single
1102 * engine need to participate in the reduction process
1104 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1105 q = &hdev->kernel_queues[queue_id];
1106 q->sync_stream_prop.collective_sob_id =
1107 sob_id + NIC_NUMBER_OF_ENGINES;
1109 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1110 q = &hdev->kernel_queues[queue_id];
1111 q->sync_stream_prop.collective_sob_id =
1112 sob_id + NIC_NUMBER_OF_ENGINES;
1115 static void gaudi_sob_group_hw_reset(struct kref *ref)
1117 struct gaudi_hw_sob_group *hw_sob_group =
1118 container_of(ref, struct gaudi_hw_sob_group, kref);
1119 struct hl_device *hdev = hw_sob_group->hdev;
1122 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1123 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1124 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1126 kref_init(&hw_sob_group->kref);
1129 static void gaudi_sob_group_reset_error(struct kref *ref)
1131 struct gaudi_hw_sob_group *hw_sob_group =
1132 container_of(ref, struct gaudi_hw_sob_group, kref);
1133 struct hl_device *hdev = hw_sob_group->hdev;
1136 "SOB release shouldn't be called here, base_sob_id: %d\n",
1137 hw_sob_group->base_sob_id);
1140 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1142 struct gaudi_collective_properties *prop;
1145 prop = &gaudi->collective_props;
1147 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1149 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1150 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1151 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1152 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1153 /* Set collective engine bit */
1154 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1155 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1158 static int gaudi_collective_init(struct hl_device *hdev)
1160 u32 i, sob_id, reserved_sobs_per_group;
1161 struct gaudi_collective_properties *prop;
1162 struct gaudi_device *gaudi;
1164 gaudi = hdev->asic_specific;
1165 prop = &gaudi->collective_props;
1166 sob_id = hdev->asic_prop.collective_first_sob;
1168 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1169 reserved_sobs_per_group =
1170 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1172 /* Init SOB groups */
1173 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1174 prop->hw_sob_group[i].hdev = hdev;
1175 prop->hw_sob_group[i].base_sob_id = sob_id;
1176 sob_id += reserved_sobs_per_group;
1177 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1180 for (i = 0 ; i < QMAN_STREAMS; i++) {
1181 prop->next_sob_group_val[i] = 1;
1182 prop->curr_sob_group_idx[i] = 0;
1183 gaudi_collective_map_sobs(hdev, i);
1186 gaudi_collective_mstr_sob_mask_set(gaudi);
1191 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1193 struct gaudi_device *gaudi = hdev->asic_specific;
1194 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1196 kref_put(&cprop->hw_sob_group[sob_group].kref,
1197 gaudi_sob_group_hw_reset);
1200 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1201 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1203 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1204 struct gaudi_collective_properties *cprop;
1205 struct hl_gen_wait_properties wait_prop;
1206 struct hl_sync_stream_properties *prop;
1207 struct gaudi_device *gaudi;
1209 gaudi = hdev->asic_specific;
1210 cprop = &gaudi->collective_props;
1211 queue_id = job->hw_queue_id;
1212 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1215 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1216 master_monitor = prop->collective_mstr_mon_id[0];
1218 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1221 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1222 master_sob_base, cprop->mstr_sob_mask[0],
1223 cprop->next_sob_group_val[stream],
1224 master_monitor, queue_id);
1226 wait_prop.data = (void *) job->patched_cb;
1227 wait_prop.sob_base = master_sob_base;
1228 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1229 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1230 wait_prop.mon_id = master_monitor;
1231 wait_prop.q_idx = queue_id;
1232 wait_prop.size = cb_size;
1233 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1235 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1236 master_monitor = prop->collective_mstr_mon_id[1];
1239 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1240 master_sob_base, cprop->mstr_sob_mask[1],
1241 cprop->next_sob_group_val[stream],
1242 master_monitor, queue_id);
1244 wait_prop.sob_base = master_sob_base;
1245 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1246 wait_prop.mon_id = master_monitor;
1247 wait_prop.size = cb_size;
1248 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1251 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1252 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1254 struct hl_gen_wait_properties wait_prop;
1255 struct hl_sync_stream_properties *prop;
1256 u32 queue_id, cb_size = 0;
1258 queue_id = job->hw_queue_id;
1259 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1261 if (job->cs->encaps_signals) {
1262 /* use the encaps signal handle store earlier in the flow
1263 * and set the SOB information from the encaps
1266 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1269 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1271 cs_cmpl->hw_sob->sob_id,
1275 /* Add to wait CBs using slave monitor */
1276 wait_prop.data = (void *) job->user_cb;
1277 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1278 wait_prop.sob_mask = 0x1;
1279 wait_prop.sob_val = cs_cmpl->sob_val;
1280 wait_prop.mon_id = prop->collective_slave_mon_id;
1281 wait_prop.q_idx = queue_id;
1282 wait_prop.size = cb_size;
1285 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1286 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1287 prop->collective_slave_mon_id, queue_id);
1289 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1292 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1293 prop->collective_sob_id, queue_id);
1295 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1296 prop->collective_sob_id, cb_size, false);
1299 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1301 struct hl_cs_compl *signal_cs_cmpl =
1302 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1303 struct hl_cs_compl *cs_cmpl =
1304 container_of(cs->fence, struct hl_cs_compl, base_fence);
1305 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1306 struct gaudi_collective_properties *cprop;
1307 u32 stream, queue_id, sob_group_offset;
1308 struct gaudi_device *gaudi;
1309 struct hl_device *hdev;
1310 struct hl_cs_job *job;
1315 gaudi = hdev->asic_specific;
1316 cprop = &gaudi->collective_props;
1318 if (cs->encaps_signals) {
1319 cs_cmpl->hw_sob = handle->hw_sob;
1320 /* at this checkpoint we only need the hw_sob pointer
1321 * for the completion check before start going over the jobs
1322 * of the master/slaves, the sob_value will be taken later on
1323 * in gaudi_collective_slave_init_job depends on each
1324 * job wait offset value.
1326 cs_cmpl->sob_val = 0;
1328 /* copy the SOB id and value of the signal CS */
1329 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1330 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1333 /* check again if the signal cs already completed.
1334 * if yes then don't send any wait cs since the hw_sob
1335 * could be in reset already. if signal is not completed
1336 * then get refcount to hw_sob to prevent resetting the sob
1337 * while wait cs is not submitted.
1338 * note that this check is protected by two locks,
1339 * hw queue lock and completion object lock,
1340 * and the same completion object lock also protects
1341 * the hw_sob reset handler function.
1342 * The hw_queue lock prevent out of sync of hw_sob
1343 * refcount value, changed by signal/wait flows.
1345 spin_lock(&signal_cs_cmpl->lock);
1347 if (completion_done(&cs->signal_fence->completion)) {
1348 spin_unlock(&signal_cs_cmpl->lock);
1351 /* Increment kref since all slave queues are now waiting on it */
1352 kref_get(&cs_cmpl->hw_sob->kref);
1354 spin_unlock(&signal_cs_cmpl->lock);
1356 /* Calculate the stream from collective master queue (1st job) */
1357 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1358 stream = job->hw_queue_id % 4;
1360 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1362 list_for_each_entry(job, &cs->job_list, cs_node) {
1363 queue_id = job->hw_queue_id;
1365 if (hdev->kernel_queues[queue_id].collective_mode ==
1366 HL_COLLECTIVE_MASTER)
1367 gaudi_collective_master_init_job(hdev, job, stream,
1370 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1373 cs_cmpl->sob_group = sob_group_offset;
1375 /* Handle sob group kref and wraparound */
1376 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1377 cprop->next_sob_group_val[stream]++;
1379 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1381 * Decrement as we reached the max value.
1382 * The release function won't be called here as we've
1383 * just incremented the refcount.
1385 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1386 gaudi_sob_group_reset_error);
1387 cprop->next_sob_group_val[stream] = 1;
1388 /* only two SOBs are currently in use */
1389 cprop->curr_sob_group_idx[stream] =
1390 (cprop->curr_sob_group_idx[stream] + 1) &
1393 gaudi_collective_map_sobs(hdev, stream);
1395 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1396 cprop->curr_sob_group_idx[stream], stream);
1400 hl_fence_put(cs->signal_fence);
1401 cs->signal_fence = NULL;
1406 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1408 u32 cacheline_end, additional_commands;
1410 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1411 additional_commands = sizeof(struct packet_msg_prot) * 2;
1413 if (user_cb_size + additional_commands > cacheline_end)
1414 return cacheline_end - user_cb_size + additional_commands;
1416 return additional_commands;
1419 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1420 struct hl_ctx *ctx, struct hl_cs *cs,
1421 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1422 u32 encaps_signal_offset)
1424 struct hw_queue_properties *hw_queue_prop;
1425 struct hl_cs_counters_atomic *cntr;
1426 struct hl_cs_job *job;
1431 cntr = &hdev->aggregated_cs_counters;
1433 if (mode == HL_COLLECTIVE_MASTER) {
1434 /* CB size of collective master queue contains
1435 * 4 msg short packets for monitor 1 configuration
1437 * 4 msg short packets for monitor 2 configuration
1439 * 2 msg prot packets for completion and MSI
1441 cb_size = sizeof(struct packet_msg_short) * 8 +
1442 sizeof(struct packet_fence) * 2 +
1443 sizeof(struct packet_msg_prot) * 2;
1446 /* CB size of collective slave queues contains
1447 * 4 msg short packets for monitor configuration
1449 * 1 additional msg short packet for sob signal
1451 cb_size = sizeof(struct packet_msg_short) * 5 +
1452 sizeof(struct packet_fence);
1456 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1457 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1459 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1460 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1461 dev_err(hdev->dev, "Failed to allocate a new job\n");
1465 /* Allocate internal mapped CB for non patched CBs */
1466 cb = hl_cb_kernel_create(hdev, cb_size,
1467 hdev->mmu_enable && !patched_cb);
1469 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1470 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1478 atomic_inc(&job->user_cb->cs_cnt);
1479 job->user_cb_size = cb_size;
1480 job->hw_queue_id = queue_id;
1482 /* since its guaranteed to have only one chunk in the collective wait
1483 * cs, we can use this chunk to set the encapsulated signal offset
1486 if (cs->encaps_signals)
1487 job->encaps_sig_wait_offset = encaps_signal_offset;
1490 * No need in parsing, user CB is the patched CB.
1491 * We call hl_cb_destroy() out of two reasons - we don't need
1492 * the CB in the CB idr anymore and to decrement its refcount as
1493 * it was incremented inside hl_cb_kernel_create().
1496 job->patched_cb = job->user_cb;
1498 job->patched_cb = NULL;
1500 job->job_cb_size = job->user_cb_size;
1501 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1503 /* increment refcount as for external queues we get completion */
1504 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1507 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1509 list_add_tail(&job->cs_node, &cs->job_list);
1511 hl_debugfs_add_job(hdev, job);
1516 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1517 struct hl_ctx *ctx, struct hl_cs *cs,
1518 u32 wait_queue_id, u32 collective_engine_id,
1519 u32 encaps_signal_offset)
1521 struct gaudi_device *gaudi = hdev->asic_specific;
1522 struct hw_queue_properties *hw_queue_prop;
1523 u32 queue_id, collective_queue, num_jobs;
1524 u32 stream, nic_queue, nic_idx = 0;
1528 /* Verify wait queue id is configured as master */
1529 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1530 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1532 "Queue %d is not configured as collective master\n",
1537 /* Verify engine id is supported */
1538 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1539 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1541 "Collective wait does not support engine %u\n",
1542 collective_engine_id);
1546 stream = wait_queue_id % 4;
1548 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1549 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1551 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1553 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1554 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1556 /* First job goes to the collective master queue, it will wait for
1557 * the collective slave queues to finish execution.
1558 * The synchronization is done using two monitors:
1559 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1560 * reduction engine (DMA5/TPC7).
1562 * Rest of the jobs goes to the collective slave queues which will
1563 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1565 for (i = 0 ; i < num_jobs ; i++) {
1567 queue_id = wait_queue_id;
1568 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1569 HL_COLLECTIVE_MASTER, queue_id,
1570 wait_queue_id, encaps_signal_offset);
1572 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1573 if (gaudi->hw_cap_initialized &
1574 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1579 queue_id = nic_queue;
1586 queue_id = collective_queue;
1589 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1590 HL_COLLECTIVE_SLAVE, queue_id,
1591 wait_queue_id, encaps_signal_offset);
1601 static int gaudi_late_init(struct hl_device *hdev)
1603 struct gaudi_device *gaudi = hdev->asic_specific;
1606 rc = gaudi->cpucp_info_get(hdev);
1608 dev_err(hdev->dev, "Failed to get cpucp info\n");
1612 if ((hdev->card_type == cpucp_card_type_pci) &&
1613 (hdev->nic_ports_mask & 0x3)) {
1615 "PCI card detected, only 8 ports are enabled\n");
1616 hdev->nic_ports_mask &= ~0x3;
1618 /* Stop and disable unused NIC QMANs */
1619 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1620 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1621 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1623 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1624 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1625 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1627 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1628 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1630 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1633 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1635 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1639 /* Scrub both SRAM and DRAM */
1640 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1642 goto disable_pci_access;
1644 rc = gaudi_fetch_psoc_frequency(hdev);
1646 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1647 goto disable_pci_access;
1650 rc = gaudi_mmu_clear_pgt_range(hdev);
1652 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1653 goto disable_pci_access;
1656 rc = gaudi_init_tpc_mem(hdev);
1658 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1659 goto disable_pci_access;
1662 rc = gaudi_collective_init(hdev);
1664 dev_err(hdev->dev, "Failed to init collective\n");
1665 goto disable_pci_access;
1668 /* We only support a single ASID for the user, so for the sake of optimization, just
1669 * initialize the ASID one time during device initialization with the fixed value of 1
1671 gaudi_mmu_prepare(hdev, 1);
1673 hl_fw_set_pll_profile(hdev);
1678 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1683 static void gaudi_late_fini(struct hl_device *hdev)
1685 hl_hwmon_release_resources(hdev);
1688 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1690 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1691 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1695 * The device CPU works with 40-bits addresses, while bit 39 must be set
1696 * to '1' when accessing the host.
1697 * Bits 49:39 of the full host address are saved for a later
1698 * configuration of the HW to perform extension to 50 bits.
1699 * Because there is a single HW register that holds the extension bits,
1700 * these bits must be identical in all allocated range.
1703 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1704 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1706 GFP_KERNEL | __GFP_ZERO);
1707 if (!virt_addr_arr[i]) {
1709 goto free_dma_mem_arr;
1712 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1713 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1714 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1718 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1720 "MSB of CPU accessible DMA memory are not identical in all range\n");
1722 goto free_dma_mem_arr;
1725 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1726 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1727 hdev->cpu_pci_msb_addr =
1728 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1730 if (!hdev->asic_prop.fw_security_enabled)
1731 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1734 for (j = 0 ; j < i ; j++)
1735 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1741 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1743 struct gaudi_device *gaudi = hdev->asic_specific;
1744 struct gaudi_internal_qman_info *q;
1747 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1748 q = &gaudi->internal_qmans[i];
1749 if (!q->pq_kernel_addr)
1751 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1755 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1757 struct gaudi_device *gaudi = hdev->asic_specific;
1758 struct gaudi_internal_qman_info *q;
1761 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1762 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1765 q = &gaudi->internal_qmans[i];
1768 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1769 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1771 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1772 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1774 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1775 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1777 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1778 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1781 dev_err(hdev->dev, "Bad internal queue index %d", i);
1783 goto free_internal_qmans_pq_mem;
1786 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1787 GFP_KERNEL | __GFP_ZERO);
1788 if (!q->pq_kernel_addr) {
1790 goto free_internal_qmans_pq_mem;
1796 free_internal_qmans_pq_mem:
1797 gaudi_free_internal_qmans_pq_mem(hdev);
1801 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1803 struct asic_fixed_properties *prop = &hdev->asic_prop;
1804 struct pci_mem_region *region;
1807 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1808 region->region_base = CFG_BASE;
1809 region->region_size = CFG_SIZE;
1810 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1811 region->bar_size = CFG_BAR_SIZE;
1812 region->bar_id = CFG_BAR_ID;
1816 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1817 region->region_base = SRAM_BASE_ADDR;
1818 region->region_size = SRAM_SIZE;
1819 region->offset_in_bar = 0;
1820 region->bar_size = SRAM_BAR_SIZE;
1821 region->bar_id = SRAM_BAR_ID;
1825 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1826 region->region_base = DRAM_PHYS_BASE;
1827 region->region_size = hdev->asic_prop.dram_size;
1828 region->offset_in_bar = 0;
1829 region->bar_size = prop->dram_pci_bar_size;
1830 region->bar_id = HBM_BAR_ID;
1834 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1835 region->region_base = PSOC_SCRATCHPAD_ADDR;
1836 region->region_size = PSOC_SCRATCHPAD_SIZE;
1837 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1838 region->bar_size = CFG_BAR_SIZE;
1839 region->bar_id = CFG_BAR_ID;
1843 static int gaudi_sw_init(struct hl_device *hdev)
1845 struct gaudi_device *gaudi;
1846 u32 i, event_id = 0;
1849 /* Allocate device structure */
1850 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1854 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1855 if (gaudi_irq_map_table[i].valid) {
1856 if (event_id == GAUDI_EVENT_SIZE) {
1858 "Event array exceeds the limit of %u events\n",
1861 goto free_gaudi_device;
1864 gaudi->events[event_id++] =
1865 gaudi_irq_map_table[i].fc_id;
1869 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1871 hdev->asic_specific = gaudi;
1873 /* Create DMA pool for small allocations */
1874 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1875 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1876 if (!hdev->dma_pool) {
1877 dev_err(hdev->dev, "failed to create DMA pool\n");
1879 goto free_gaudi_device;
1882 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1886 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1887 if (!hdev->cpu_accessible_dma_pool) {
1889 "Failed to create CPU accessible DMA pool\n");
1891 goto free_cpu_dma_mem;
1894 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1895 (uintptr_t) hdev->cpu_accessible_dma_mem,
1896 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1899 "Failed to add memory to CPU accessible DMA pool\n");
1901 goto free_cpu_accessible_dma_pool;
1904 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1906 goto free_cpu_accessible_dma_pool;
1908 spin_lock_init(&gaudi->hw_queues_lock);
1910 hdev->supports_sync_stream = true;
1911 hdev->supports_coresight = true;
1912 hdev->supports_staged_submission = true;
1913 hdev->supports_wait_for_multi_cs = true;
1915 hdev->asic_funcs->set_pci_memory_regions(hdev);
1916 hdev->stream_master_qid_arr =
1917 hdev->asic_funcs->get_stream_master_qid_arr();
1918 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1922 free_cpu_accessible_dma_pool:
1923 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1925 if (!hdev->asic_prop.fw_security_enabled)
1926 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1927 hdev->cpu_pci_msb_addr);
1928 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1929 hdev->cpu_accessible_dma_address);
1931 dma_pool_destroy(hdev->dma_pool);
1937 static int gaudi_sw_fini(struct hl_device *hdev)
1939 struct gaudi_device *gaudi = hdev->asic_specific;
1941 gaudi_free_internal_qmans_pq_mem(hdev);
1943 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1945 if (!hdev->asic_prop.fw_security_enabled)
1946 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1947 hdev->cpu_pci_msb_addr);
1949 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1950 hdev->cpu_accessible_dma_address);
1952 dma_pool_destroy(hdev->dma_pool);
1959 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1961 struct hl_device *hdev = arg;
1967 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1968 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1970 hl_irq_handler_eq(irq, &hdev->event_queue);
1976 * For backward compatibility, new MSI interrupts should be set after the
1977 * existing CPU and NIC interrupts.
1979 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1984 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1985 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1986 GAUDI_EVENT_QUEUE_MSI_IDX);
1988 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1989 (nr + NIC_NUMBER_OF_ENGINES + 1);
1991 return pci_irq_vector(hdev->pdev, msi_vec);
1994 static int gaudi_enable_msi_single(struct hl_device *hdev)
1998 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2000 irq = gaudi_pci_irq_vector(hdev, 0, false);
2001 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2002 "gaudi single msi", hdev);
2005 "Failed to request single MSI IRQ\n");
2010 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2012 int cq_cnt = hdev->asic_prop.completion_queues_count;
2013 int rc, i, irq_cnt_init, irq;
2015 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2016 irq = gaudi_pci_irq_vector(hdev, i, false);
2017 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2018 &hdev->completion_queue[i]);
2020 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2025 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2026 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2027 &hdev->event_queue);
2029 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2036 for (i = 0 ; i < irq_cnt_init ; i++)
2037 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2038 &hdev->completion_queue[i]);
2042 static int gaudi_enable_msi(struct hl_device *hdev)
2044 struct gaudi_device *gaudi = hdev->asic_specific;
2047 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2050 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2052 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2056 if (rc < NUMBER_OF_INTERRUPTS) {
2057 gaudi->multi_msi_mode = false;
2058 rc = gaudi_enable_msi_single(hdev);
2060 gaudi->multi_msi_mode = true;
2061 rc = gaudi_enable_msi_multi(hdev);
2065 goto free_pci_irq_vectors;
2067 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2071 free_pci_irq_vectors:
2072 pci_free_irq_vectors(hdev->pdev);
2076 static void gaudi_sync_irqs(struct hl_device *hdev)
2078 struct gaudi_device *gaudi = hdev->asic_specific;
2079 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2081 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2084 /* Wait for all pending IRQs to be finished */
2085 if (gaudi->multi_msi_mode) {
2086 for (i = 0 ; i < cq_cnt ; i++)
2087 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2089 synchronize_irq(gaudi_pci_irq_vector(hdev,
2090 GAUDI_EVENT_QUEUE_MSI_IDX,
2093 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2097 static void gaudi_disable_msi(struct hl_device *hdev)
2099 struct gaudi_device *gaudi = hdev->asic_specific;
2100 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2102 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2105 gaudi_sync_irqs(hdev);
2107 if (gaudi->multi_msi_mode) {
2108 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2110 free_irq(irq, &hdev->event_queue);
2112 for (i = 0 ; i < cq_cnt ; i++) {
2113 irq = gaudi_pci_irq_vector(hdev, i, false);
2114 free_irq(irq, &hdev->completion_queue[i]);
2117 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2120 pci_free_irq_vectors(hdev->pdev);
2122 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2125 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2127 struct gaudi_device *gaudi = hdev->asic_specific;
2129 if (hdev->asic_prop.fw_security_enabled)
2132 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2133 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2136 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2139 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2140 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2142 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2144 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2146 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2148 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2150 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2152 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2154 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2157 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2159 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2160 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2161 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2162 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2163 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2164 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2165 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2166 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2167 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2168 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2169 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2170 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2171 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2174 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2175 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2176 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2177 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2178 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2179 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2180 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2181 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2182 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2183 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2184 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2185 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2186 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2187 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2188 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2193 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2195 struct gaudi_device *gaudi = hdev->asic_specific;
2197 if (hdev->asic_prop.fw_security_enabled)
2200 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2201 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2204 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2207 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2208 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2209 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2210 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2211 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2212 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2213 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2214 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2215 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2216 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2217 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2218 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2219 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2220 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2221 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2222 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2225 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2227 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2228 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2229 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2230 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2231 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2232 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2233 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2234 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2235 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2236 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2237 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2238 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2239 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2242 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2243 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2244 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2245 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2246 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2247 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2248 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2249 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2250 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2251 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2252 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2253 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2254 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2255 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2256 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2261 static void gaudi_init_e2e(struct hl_device *hdev)
2263 if (hdev->asic_prop.fw_security_enabled)
2266 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2267 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2270 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2271 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2272 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2273 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2275 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2276 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2277 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2278 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2280 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2281 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2282 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2283 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2285 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2286 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2287 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2288 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2290 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2291 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2292 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2293 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2295 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2296 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2297 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2298 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2300 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2301 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2302 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2303 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2305 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2306 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2307 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2308 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2310 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2311 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2312 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2313 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2315 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2316 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2317 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2318 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2320 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2321 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2322 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2323 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2325 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2326 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2327 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2328 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2330 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2331 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2332 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2333 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2335 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2336 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2337 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2338 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2340 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2341 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2342 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2343 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2345 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2346 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2347 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2348 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2350 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2351 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2352 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2353 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2355 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2356 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2357 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2358 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2360 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2361 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2362 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2363 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2365 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2366 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2367 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2368 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2370 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2371 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2372 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2373 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2375 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2376 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2377 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2378 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2380 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2381 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2382 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2383 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2385 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2386 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2387 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2388 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2390 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2391 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2393 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2395 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2396 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2398 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2400 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2401 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2403 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2405 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2406 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2408 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2410 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2411 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2413 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2415 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2416 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2417 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2418 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2420 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2421 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2422 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2423 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2425 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2426 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2427 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2428 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2430 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2431 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2432 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2433 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2435 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2436 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2437 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2438 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2440 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2441 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2442 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2443 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2445 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2446 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2447 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2448 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2450 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2451 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2452 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2453 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2455 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2456 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2457 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2458 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2460 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2461 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2462 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2463 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2465 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2466 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2467 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2468 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2470 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2471 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2472 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2473 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2475 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2476 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2477 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2478 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2480 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2481 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2482 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2483 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2485 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2486 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2487 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2488 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2490 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2491 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2492 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2493 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2495 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2496 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2497 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2498 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2500 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2501 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2502 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2503 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2505 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2506 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2507 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2508 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2511 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2513 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2515 if (hdev->asic_prop.fw_security_enabled)
2518 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2519 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2522 hbm0_wr = 0x33333333;
2523 hbm0_rd = 0x77777777;
2524 hbm1_wr = 0x55555555;
2525 hbm1_rd = 0xDDDDDDDD;
2527 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2528 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2529 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2530 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2532 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2533 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2534 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2535 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2537 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2538 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2539 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2540 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2542 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2543 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2544 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2545 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2547 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2548 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2549 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2550 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2551 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2552 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2553 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2554 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2555 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2556 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2557 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2558 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2560 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2561 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2562 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2563 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2564 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2565 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2566 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2567 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2568 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2569 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2570 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2571 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2574 static void gaudi_init_golden_registers(struct hl_device *hdev)
2579 gaudi_init_e2e(hdev);
2580 gaudi_init_hbm_cred(hdev);
2582 for (tpc_id = 0, tpc_offset = 0;
2583 tpc_id < TPC_NUMBER_OF_ENGINES;
2584 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2585 /* Mask all arithmetic interrupts from TPC */
2586 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2587 /* Set 16 cache lines */
2588 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2589 ICACHE_FETCH_LINE_NUM, 2);
2592 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2593 for (i = 0 ; i < 128 ; i += 8)
2594 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2596 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2597 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2598 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2599 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2602 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2603 int qman_id, dma_addr_t qman_pq_addr)
2605 struct cpu_dyn_regs *dyn_regs =
2606 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2607 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2608 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2609 u32 q_off, dma_qm_offset;
2610 u32 dma_qm_err_cfg, irq_handler_offset;
2612 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2614 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2615 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2616 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2617 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2618 so_base_en_lo = lower_32_bits(CFG_BASE +
2619 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2620 so_base_en_hi = upper_32_bits(CFG_BASE +
2621 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2622 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2623 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2624 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2625 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2626 so_base_ws_lo = lower_32_bits(CFG_BASE +
2627 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2628 so_base_ws_hi = upper_32_bits(CFG_BASE +
2629 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2631 q_off = dma_qm_offset + qman_id * 4;
2633 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2634 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2636 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2637 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2638 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2640 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2641 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2642 QMAN_LDMA_SRC_OFFSET);
2643 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2644 QMAN_LDMA_DST_OFFSET);
2646 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2647 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2648 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2649 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2650 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2651 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2652 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2653 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2655 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2657 /* The following configuration is needed only once per QMAN */
2659 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2660 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2661 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2663 /* Configure RAZWI IRQ */
2664 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2665 if (hdev->stop_on_err)
2667 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2669 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2671 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2672 lower_32_bits(CFG_BASE + irq_handler_offset));
2673 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2674 upper_32_bits(CFG_BASE + irq_handler_offset));
2676 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2677 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2680 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2681 QM_ARB_ERR_MSG_EN_MASK);
2683 /* Set timeout to maximum */
2684 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2686 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2687 QMAN_EXTERNAL_MAKE_TRUSTED);
2689 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2693 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2695 struct cpu_dyn_regs *dyn_regs =
2696 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2697 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2698 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2699 u32 irq_handler_offset;
2701 /* Set to maximum possible according to physical size */
2702 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2703 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2705 /* WA for H/W bug H3-2116 */
2706 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2708 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2709 if (hdev->stop_on_err)
2710 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2712 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2714 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2715 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2716 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2718 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2719 lower_32_bits(CFG_BASE + irq_handler_offset));
2720 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2721 upper_32_bits(CFG_BASE + irq_handler_offset));
2723 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2724 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2725 WREG32(mmDMA0_CORE_PROT + dma_offset,
2726 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2727 /* If the channel is secured, it should be in MMU bypass mode */
2728 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2729 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2730 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2733 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2736 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2741 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2743 struct gaudi_device *gaudi = hdev->asic_specific;
2744 struct hl_hw_queue *q;
2745 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2747 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2750 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2751 dma_id = gaudi_dma_assignment[i];
2753 * For queues after the CPU Q need to add 1 to get the correct
2754 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2755 * order to get the correct MSI register.
2759 nic_skip = NIC_NUMBER_OF_ENGINES;
2765 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2766 q_idx = 4 * dma_id + j + cpu_skip;
2767 q = &hdev->kernel_queues[q_idx];
2769 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2770 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2774 gaudi_init_dma_core(hdev, dma_id);
2776 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2779 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2782 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2783 int qman_id, u64 qman_base_addr)
2785 struct cpu_dyn_regs *dyn_regs =
2786 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2787 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2788 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2789 u32 dma_qm_err_cfg, irq_handler_offset;
2790 u32 q_off, dma_qm_offset;
2792 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2794 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2795 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2796 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2797 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2798 so_base_en_lo = lower_32_bits(CFG_BASE +
2799 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2800 so_base_en_hi = upper_32_bits(CFG_BASE +
2801 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2802 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2803 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2804 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2805 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2806 so_base_ws_lo = lower_32_bits(CFG_BASE +
2807 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2808 so_base_ws_hi = upper_32_bits(CFG_BASE +
2809 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2811 q_off = dma_qm_offset + qman_id * 4;
2814 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2815 lower_32_bits(qman_base_addr));
2816 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2817 upper_32_bits(qman_base_addr));
2819 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2820 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2821 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2823 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2824 QMAN_CPDMA_SIZE_OFFSET);
2825 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2826 QMAN_CPDMA_SRC_OFFSET);
2827 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2828 QMAN_CPDMA_DST_OFFSET);
2830 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2831 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2832 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2834 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2835 QMAN_LDMA_SIZE_OFFSET);
2836 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2837 QMAN_LDMA_SRC_OFFSET);
2838 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2839 QMAN_LDMA_DST_OFFSET);
2841 /* Configure RAZWI IRQ */
2842 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2843 if (hdev->stop_on_err)
2845 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2847 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2849 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2850 lower_32_bits(CFG_BASE + irq_handler_offset));
2851 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2852 upper_32_bits(CFG_BASE + irq_handler_offset));
2854 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2855 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2858 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2859 QM_ARB_ERR_MSG_EN_MASK);
2861 /* Set timeout to maximum */
2862 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2864 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2865 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2866 QMAN_INTERNAL_MAKE_TRUSTED);
2869 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2870 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2871 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2872 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2874 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2875 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2876 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2878 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2880 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2882 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2887 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2889 struct gaudi_device *gaudi = hdev->asic_specific;
2890 struct gaudi_internal_qman_info *q;
2892 int i, j, dma_id, internal_q_index;
2894 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2897 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2898 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2900 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2902 * Add the CPU queue in order to get the correct queue
2903 * number as all internal queue are placed after it
2905 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2907 q = &gaudi->internal_qmans[internal_q_index];
2908 qman_base_addr = (u64) q->pq_dma_addr;
2909 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2913 /* Initializing lower CP for HBM DMA QMAN */
2914 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2916 gaudi_init_dma_core(hdev, dma_id);
2918 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2921 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2924 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2925 int qman_id, u64 qman_base_addr)
2927 struct cpu_dyn_regs *dyn_regs =
2928 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2929 u32 mtr_base_lo, mtr_base_hi;
2930 u32 so_base_lo, so_base_hi;
2931 u32 irq_handler_offset;
2935 mtr_base_lo = lower_32_bits(CFG_BASE +
2936 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2937 mtr_base_hi = upper_32_bits(CFG_BASE +
2938 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2939 so_base_lo = lower_32_bits(CFG_BASE +
2940 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2941 so_base_hi = upper_32_bits(CFG_BASE +
2942 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2944 q_off = mme_offset + qman_id * 4;
2947 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2948 lower_32_bits(qman_base_addr));
2949 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2950 upper_32_bits(qman_base_addr));
2952 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2953 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2954 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2956 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2957 QMAN_CPDMA_SIZE_OFFSET);
2958 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2959 QMAN_CPDMA_SRC_OFFSET);
2960 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2961 QMAN_CPDMA_DST_OFFSET);
2963 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2964 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2965 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2967 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2968 QMAN_LDMA_SIZE_OFFSET);
2969 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2970 QMAN_LDMA_SRC_OFFSET);
2971 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2972 QMAN_LDMA_DST_OFFSET);
2974 /* Configure RAZWI IRQ */
2975 mme_id = mme_offset /
2976 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2978 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2979 if (hdev->stop_on_err)
2981 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2983 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2985 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2986 lower_32_bits(CFG_BASE + irq_handler_offset));
2987 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2988 upper_32_bits(CFG_BASE + irq_handler_offset));
2990 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2991 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2994 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2995 QM_ARB_ERR_MSG_EN_MASK);
2997 /* Set timeout to maximum */
2998 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3000 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3001 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3002 QMAN_INTERNAL_MAKE_TRUSTED);
3005 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3006 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3007 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3008 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3011 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3013 struct gaudi_device *gaudi = hdev->asic_specific;
3014 struct gaudi_internal_qman_info *q;
3017 int i, internal_q_index;
3019 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3023 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3024 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3027 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3029 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3030 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3031 q = &gaudi->internal_qmans[internal_q_index];
3032 qman_base_addr = (u64) q->pq_dma_addr;
3033 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3039 /* Initializing lower CP for MME QMANs */
3040 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3041 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3042 gaudi_init_mme_qman(hdev, 0, 4, 0);
3044 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3045 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3047 gaudi->hw_cap_initialized |= HW_CAP_MME;
3050 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3051 int qman_id, u64 qman_base_addr)
3053 struct cpu_dyn_regs *dyn_regs =
3054 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3055 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3056 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3057 u32 tpc_qm_err_cfg, irq_handler_offset;
3060 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3061 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3062 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3063 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3064 so_base_en_lo = lower_32_bits(CFG_BASE +
3065 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3066 so_base_en_hi = upper_32_bits(CFG_BASE +
3067 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3068 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3069 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3070 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3071 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3072 so_base_ws_lo = lower_32_bits(CFG_BASE +
3073 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3074 so_base_ws_hi = upper_32_bits(CFG_BASE +
3075 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3077 q_off = tpc_offset + qman_id * 4;
3079 tpc_id = tpc_offset /
3080 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3083 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3084 lower_32_bits(qman_base_addr));
3085 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3086 upper_32_bits(qman_base_addr));
3088 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3089 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3090 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3092 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3093 QMAN_CPDMA_SIZE_OFFSET);
3094 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3095 QMAN_CPDMA_SRC_OFFSET);
3096 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3097 QMAN_CPDMA_DST_OFFSET);
3099 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3100 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3101 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3103 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3104 QMAN_LDMA_SIZE_OFFSET);
3105 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3106 QMAN_LDMA_SRC_OFFSET);
3107 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3108 QMAN_LDMA_DST_OFFSET);
3110 /* Configure RAZWI IRQ */
3111 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3112 if (hdev->stop_on_err)
3114 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3116 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3118 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3119 lower_32_bits(CFG_BASE + irq_handler_offset));
3120 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3121 upper_32_bits(CFG_BASE + irq_handler_offset));
3123 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3124 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3127 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3128 QM_ARB_ERR_MSG_EN_MASK);
3130 /* Set timeout to maximum */
3131 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3133 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3134 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3135 QMAN_INTERNAL_MAKE_TRUSTED);
3138 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3139 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3140 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3141 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3143 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3145 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3147 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3149 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3151 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3156 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3158 struct gaudi_device *gaudi = hdev->asic_specific;
3159 struct gaudi_internal_qman_info *q;
3161 u32 so_base_hi, tpc_offset = 0;
3162 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3163 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3164 int i, tpc_id, internal_q_index;
3166 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3169 so_base_hi = upper_32_bits(CFG_BASE +
3170 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3173 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3174 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3175 tpc_id * QMAN_STREAMS + i;
3176 q = &gaudi->internal_qmans[internal_q_index];
3177 qman_base_addr = (u64) q->pq_dma_addr;
3178 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3182 /* Initializing lower CP for TPC QMAN */
3183 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3185 /* Enable the QMAN and TPC channel */
3186 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3191 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3194 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3196 gaudi->hw_cap_initialized |=
3197 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3201 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3202 int qman_id, u64 qman_base_addr, int nic_id)
3204 struct cpu_dyn_regs *dyn_regs =
3205 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3206 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3207 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3208 u32 nic_qm_err_cfg, irq_handler_offset;
3211 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3212 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3213 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3214 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3215 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3216 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3217 so_base_en_hi = upper_32_bits(CFG_BASE +
3218 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3219 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3220 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3221 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3222 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3223 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3224 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3225 so_base_ws_hi = upper_32_bits(CFG_BASE +
3226 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228 q_off = nic_offset + qman_id * 4;
3230 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3231 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3233 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3234 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3235 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3237 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3238 QMAN_LDMA_SIZE_OFFSET);
3239 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3240 QMAN_LDMA_SRC_OFFSET);
3241 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3242 QMAN_LDMA_DST_OFFSET);
3244 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3245 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3246 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3247 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3249 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3250 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3251 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3252 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3253 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3256 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3257 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3258 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3260 /* Configure RAZWI IRQ */
3261 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3262 if (hdev->stop_on_err)
3264 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3266 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3268 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3269 lower_32_bits(CFG_BASE + irq_handler_offset));
3270 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3271 upper_32_bits(CFG_BASE + irq_handler_offset));
3273 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3274 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3277 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3278 QM_ARB_ERR_MSG_EN_MASK);
3280 /* Set timeout to maximum */
3281 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3283 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3284 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3285 QMAN_INTERNAL_MAKE_TRUSTED);
3289 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3291 struct gaudi_device *gaudi = hdev->asic_specific;
3292 struct gaudi_internal_qman_info *q;
3295 u32 nic_delta_between_qmans =
3296 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3297 u32 nic_delta_between_nics =
3298 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3299 int i, nic_id, internal_q_index;
3301 if (!hdev->nic_ports_mask)
3304 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3307 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3309 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3310 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3311 nic_offset += nic_delta_between_qmans;
3313 nic_offset -= (nic_delta_between_qmans * 2);
3314 nic_offset += nic_delta_between_nics;
3319 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3320 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3321 nic_id * QMAN_STREAMS + i;
3322 q = &gaudi->internal_qmans[internal_q_index];
3323 qman_base_addr = (u64) q->pq_dma_addr;
3324 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3325 qman_base_addr, nic_id);
3328 /* Enable the QMAN */
3329 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3331 nic_offset += nic_delta_between_qmans;
3333 nic_offset -= (nic_delta_between_qmans * 2);
3334 nic_offset += nic_delta_between_nics;
3337 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3341 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3343 struct gaudi_device *gaudi = hdev->asic_specific;
3345 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3348 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3349 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3350 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3353 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3355 struct gaudi_device *gaudi = hdev->asic_specific;
3357 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3360 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3361 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3362 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3363 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3364 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3367 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3369 struct gaudi_device *gaudi = hdev->asic_specific;
3371 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3374 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3375 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3378 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3380 struct gaudi_device *gaudi = hdev->asic_specific;
3384 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3387 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3388 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3389 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3393 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3395 struct gaudi_device *gaudi = hdev->asic_specific;
3396 u32 nic_mask, nic_offset = 0;
3397 u32 nic_delta_between_qmans =
3398 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3399 u32 nic_delta_between_nics =
3400 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3403 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3404 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3406 if (gaudi->hw_cap_initialized & nic_mask)
3407 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3409 nic_offset += nic_delta_between_qmans;
3411 nic_offset -= (nic_delta_between_qmans * 2);
3412 nic_offset += nic_delta_between_nics;
3417 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3419 struct gaudi_device *gaudi = hdev->asic_specific;
3421 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3424 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3425 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3426 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3427 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3430 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3432 struct gaudi_device *gaudi = hdev->asic_specific;
3434 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3437 /* Stop CPs of HBM DMA QMANs */
3439 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3440 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3441 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3442 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3446 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3448 struct gaudi_device *gaudi = hdev->asic_specific;
3450 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3453 /* Stop CPs of MME QMANs */
3454 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3455 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3458 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3460 struct gaudi_device *gaudi = hdev->asic_specific;
3462 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3465 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3466 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3467 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3468 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3475 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3477 struct gaudi_device *gaudi = hdev->asic_specific;
3479 /* Stop upper CPs of QMANs */
3481 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3482 WREG32(mmNIC0_QM0_GLBL_CFG1,
3483 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3484 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3485 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3487 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3488 WREG32(mmNIC0_QM1_GLBL_CFG1,
3489 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3490 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3491 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3493 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3494 WREG32(mmNIC1_QM0_GLBL_CFG1,
3495 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3496 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3497 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3499 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3500 WREG32(mmNIC1_QM1_GLBL_CFG1,
3501 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3502 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3503 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3505 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3506 WREG32(mmNIC2_QM0_GLBL_CFG1,
3507 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3508 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3509 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3511 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3512 WREG32(mmNIC2_QM1_GLBL_CFG1,
3513 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3514 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3515 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3517 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3518 WREG32(mmNIC3_QM0_GLBL_CFG1,
3519 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3520 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3521 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3523 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3524 WREG32(mmNIC3_QM1_GLBL_CFG1,
3525 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3526 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3527 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3529 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3530 WREG32(mmNIC4_QM0_GLBL_CFG1,
3531 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3532 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3533 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3535 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3536 WREG32(mmNIC4_QM1_GLBL_CFG1,
3537 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3538 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3539 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3542 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3544 struct gaudi_device *gaudi = hdev->asic_specific;
3546 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3549 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3550 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3551 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3554 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3556 struct gaudi_device *gaudi = hdev->asic_specific;
3558 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3561 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3562 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3563 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3564 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3568 static void gaudi_mme_stall(struct hl_device *hdev)
3570 struct gaudi_device *gaudi = hdev->asic_specific;
3572 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3575 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3576 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3577 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3578 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3579 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3580 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3581 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3582 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3583 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3584 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3585 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3586 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3587 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3588 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3589 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3590 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3591 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3594 static void gaudi_tpc_stall(struct hl_device *hdev)
3596 struct gaudi_device *gaudi = hdev->asic_specific;
3598 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3601 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3602 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3603 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3604 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3605 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3606 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3607 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3608 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3611 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3616 if (hdev->asic_prop.fw_security_enabled)
3619 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3620 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3621 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3623 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3626 WREG32(mmMME0_QM_CGM_CFG, 0);
3627 WREG32(mmMME0_QM_CGM_CFG1, 0);
3628 WREG32(mmMME2_QM_CGM_CFG, 0);
3629 WREG32(mmMME2_QM_CGM_CFG1, 0);
3631 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3632 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3633 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3635 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3639 static void gaudi_enable_timestamp(struct hl_device *hdev)
3641 /* Disable the timestamp counter */
3642 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3644 /* Zero the lower/upper parts of the 64-bit counter */
3645 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3646 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3648 /* Enable the counter */
3649 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3652 static void gaudi_disable_timestamp(struct hl_device *hdev)
3654 /* Disable the timestamp counter */
3655 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3658 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3660 u32 wait_timeout_ms;
3663 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3665 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3670 gaudi_stop_nic_qmans(hdev);
3671 gaudi_stop_mme_qmans(hdev);
3672 gaudi_stop_tpc_qmans(hdev);
3673 gaudi_stop_hbm_dma_qmans(hdev);
3674 gaudi_stop_pci_dma_qmans(hdev);
3676 msleep(wait_timeout_ms);
3678 gaudi_pci_dma_stall(hdev);
3679 gaudi_hbm_dma_stall(hdev);
3680 gaudi_tpc_stall(hdev);
3681 gaudi_mme_stall(hdev);
3683 msleep(wait_timeout_ms);
3685 gaudi_disable_nic_qmans(hdev);
3686 gaudi_disable_mme_qmans(hdev);
3687 gaudi_disable_tpc_qmans(hdev);
3688 gaudi_disable_hbm_dma_qmans(hdev);
3689 gaudi_disable_pci_dma_qmans(hdev);
3691 gaudi_disable_timestamp(hdev);
3694 gaudi_disable_msi(hdev);
3697 static int gaudi_mmu_init(struct hl_device *hdev)
3699 struct asic_fixed_properties *prop = &hdev->asic_prop;
3700 struct gaudi_device *gaudi = hdev->asic_specific;
3704 if (!hdev->mmu_enable)
3707 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3710 for (i = 0 ; i < prop->max_asid ; i++) {
3711 hop0_addr = prop->mmu_pgt_addr +
3712 (i * prop->mmu_hop_table_size);
3714 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3717 "failed to set hop0 addr for asid %d\n", i);
3722 /* init MMU cache manage page */
3723 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3724 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3726 /* mem cache invalidation */
3727 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3729 hl_mmu_invalidate_cache(hdev, true, 0);
3731 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3732 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3734 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3737 * The H/W expects the first PI after init to be 1. After wraparound
3740 gaudi->mmu_cache_inv_pi = 1;
3742 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3750 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3754 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3756 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3759 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3763 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3765 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3768 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3770 struct dynamic_fw_load_mgr *dynamic_loader;
3771 struct cpu_dyn_regs *dyn_regs;
3773 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3776 * here we update initial values for few specific dynamic regs (as
3777 * before reading the first descriptor from FW those value has to be
3778 * hard-coded) in later stages of the protocol those values will be
3779 * updated automatically by reading the FW descriptor so data there
3780 * will always be up-to-date
3782 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3783 dyn_regs->kmd_msg_to_cpu =
3784 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3785 dyn_regs->cpu_cmd_status_to_host =
3786 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3788 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3791 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3793 struct static_fw_load_mgr *static_loader;
3795 static_loader = &hdev->fw_loader.static_loader;
3797 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3798 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3799 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3800 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3801 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3802 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3803 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3804 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3805 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3806 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3807 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3808 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3809 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3810 GAUDI_PLDM_RESET_WAIT_MSEC :
3811 GAUDI_CPU_RESET_WAIT_MSEC;
3814 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3816 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3818 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3819 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3820 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3821 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3822 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3823 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3826 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3828 struct asic_fixed_properties *prop = &hdev->asic_prop;
3829 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3831 /* fill common fields */
3832 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3833 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3834 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3835 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3836 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3837 fw_loader->skip_bmc = !hdev->bmc_enable;
3838 fw_loader->sram_bar_id = SRAM_BAR_ID;
3839 fw_loader->dram_bar_id = HBM_BAR_ID;
3841 if (prop->dynamic_fw_load)
3842 gaudi_init_dynamic_firmware_loader(hdev);
3844 gaudi_init_static_firmware_loader(hdev);
3847 static int gaudi_init_cpu(struct hl_device *hdev)
3849 struct gaudi_device *gaudi = hdev->asic_specific;
3852 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3855 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3859 * The device CPU works with 40 bits addresses.
3860 * This register sets the extension to 50 bits.
3862 if (!hdev->asic_prop.fw_security_enabled)
3863 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3865 rc = hl_fw_init_cpu(hdev);
3870 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3875 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3877 struct cpu_dyn_regs *dyn_regs =
3878 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3879 struct asic_fixed_properties *prop = &hdev->asic_prop;
3880 struct gaudi_device *gaudi = hdev->asic_specific;
3881 u32 status, irq_handler_offset;
3883 struct hl_hw_queue *cpu_pq =
3884 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3887 if (!hdev->cpu_queues_enable)
3890 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3893 eq = &hdev->event_queue;
3895 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3896 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3898 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3899 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3901 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3902 lower_32_bits(hdev->cpu_accessible_dma_address));
3903 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3904 upper_32_bits(hdev->cpu_accessible_dma_address));
3906 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3907 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3908 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3910 /* Used for EQ CI */
3911 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3913 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3915 if (gaudi->multi_msi_mode)
3916 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3918 WREG32(mmCPU_IF_QUEUE_INIT,
3919 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3921 irq_handler_offset = prop->gic_interrupts_enable ?
3922 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3923 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3925 WREG32(irq_handler_offset,
3926 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3928 err = hl_poll_timeout(
3930 mmCPU_IF_QUEUE_INIT,
3932 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3938 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3942 /* update FW application security bits */
3943 if (prop->fw_cpu_boot_dev_sts0_valid)
3944 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3945 if (prop->fw_cpu_boot_dev_sts1_valid)
3946 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3948 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3952 static void gaudi_pre_hw_init(struct hl_device *hdev)
3954 /* Perform read from the device to make sure device is up */
3957 if (!hdev->asic_prop.fw_security_enabled) {
3958 /* Set the access through PCI bars (Linux driver only) as
3961 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3962 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3963 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3965 /* Perform read to flush the waiting writes to ensure
3966 * configuration was set in the device
3968 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3972 * Let's mark in the H/W that we have reached this point. We check
3973 * this value in the reset_before_init function to understand whether
3974 * we need to reset the chip before doing H/W init. This register is
3975 * cleared by the H/W upon H/W reset
3977 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3980 static int gaudi_hw_init(struct hl_device *hdev)
3982 struct gaudi_device *gaudi = hdev->asic_specific;
3985 gaudi_pre_hw_init(hdev);
3987 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3988 * So we set it here and if anyone tries to move it later to
3989 * a different address, there will be an error
3991 if (hdev->asic_prop.iatu_done_by_fw)
3992 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3995 * Before pushing u-boot/linux to device, need to set the hbm bar to
3996 * base address of dram
3998 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4000 "failed to map HBM bar to DRAM base address\n");
4004 rc = gaudi_init_cpu(hdev);
4006 dev_err(hdev->dev, "failed to initialize CPU\n");
4010 /* In case the clock gating was enabled in preboot we need to disable
4011 * it here before touching the MME/TPC registers.
4013 gaudi_disable_clock_gating(hdev);
4015 /* SRAM scrambler must be initialized after CPU is running from HBM */
4016 gaudi_init_scrambler_sram(hdev);
4018 /* This is here just in case we are working without CPU */
4019 gaudi_init_scrambler_hbm(hdev);
4021 gaudi_init_golden_registers(hdev);
4023 rc = gaudi_mmu_init(hdev);
4027 gaudi_init_security(hdev);
4029 gaudi_init_pci_dma_qmans(hdev);
4031 gaudi_init_hbm_dma_qmans(hdev);
4033 gaudi_init_mme_qmans(hdev);
4035 gaudi_init_tpc_qmans(hdev);
4037 gaudi_init_nic_qmans(hdev);
4039 gaudi_enable_timestamp(hdev);
4041 /* MSI must be enabled before CPU queues and NIC are initialized */
4042 rc = gaudi_enable_msi(hdev);
4044 goto disable_queues;
4046 /* must be called after MSI was enabled */
4047 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4049 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4054 /* Perform read from the device to flush all configuration */
4060 gaudi_disable_msi(hdev);
4062 gaudi_disable_mme_qmans(hdev);
4063 gaudi_disable_pci_dma_qmans(hdev);
4068 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4070 struct cpu_dyn_regs *dyn_regs =
4071 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4072 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4073 struct gaudi_device *gaudi = hdev->asic_specific;
4074 bool driver_performs_reset;
4077 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4082 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4083 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4085 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4086 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4091 "Firmware performs HARD reset, going to wait %dms\n",
4097 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4098 !hdev->asic_prop.hard_reset_done_by_fw);
4100 /* Set device to handle FLR by H/W as we will put the device CPU to
4103 if (driver_performs_reset)
4104 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4105 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4107 /* If linux is loaded in the device CPU we need to communicate with it
4108 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4109 * registers in case of old F/Ws
4111 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4112 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4113 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4114 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4116 WREG32(irq_handler_offset,
4117 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4119 /* This is a hail-mary attempt to revive the card in the small chance that the
4120 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4121 * In that case, triggering reset through GIC won't help. We need to trigger the
4122 * reset as if Linux wasn't loaded.
4124 * We do it only if the reset cause was HB, because that would be the indication
4127 * In case watchdog hasn't expired but we still got HB, then this won't do any
4130 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4131 if (hdev->asic_prop.hard_reset_done_by_fw)
4132 hl_fw_ask_hard_reset_without_linux(hdev);
4134 hl_fw_ask_halt_machine_without_linux(hdev);
4137 if (hdev->asic_prop.hard_reset_done_by_fw)
4138 hl_fw_ask_hard_reset_without_linux(hdev);
4140 hl_fw_ask_halt_machine_without_linux(hdev);
4143 if (driver_performs_reset) {
4145 /* Configure the reset registers. Must be done as early as
4146 * possible in case we fail during H/W initialization
4148 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4149 (CFG_RST_H_DMA_MASK |
4150 CFG_RST_H_MME_MASK |
4152 CFG_RST_H_TPC_7_MASK));
4154 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4156 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4157 (CFG_RST_H_HBM_MASK |
4158 CFG_RST_H_TPC_7_MASK |
4159 CFG_RST_H_NIC_MASK |
4161 CFG_RST_H_DMA_MASK |
4162 CFG_RST_H_MME_MASK |
4163 CFG_RST_H_CPU_MASK |
4164 CFG_RST_H_MMU_MASK));
4166 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4167 (CFG_RST_L_IF_MASK |
4168 CFG_RST_L_PSOC_MASK |
4169 CFG_RST_L_TPC_MASK));
4171 msleep(cpu_timeout_ms);
4173 /* Tell ASIC not to re-initialize PCIe */
4174 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4176 /* Restart BTL/BLR upon hard-reset */
4177 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4179 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4180 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4183 "Issued HARD reset command, going to wait %dms\n",
4187 "Firmware performs HARD reset, going to wait %dms\n",
4193 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4194 * itself is in reset. Need to wait until the reset is deasserted
4196 msleep(reset_timeout_ms);
4198 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4199 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4201 "Timeout while waiting for device to reset 0x%x\n",
4205 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4206 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4207 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4208 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4209 HW_CAP_HBM_SCRAMBLER);
4211 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4213 hdev->device_cpu_is_halted = false;
4217 static int gaudi_suspend(struct hl_device *hdev)
4221 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4223 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4228 static int gaudi_resume(struct hl_device *hdev)
4230 return gaudi_init_iatu(hdev);
4233 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4234 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4238 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4239 VM_DONTCOPY | VM_NORESERVE;
4241 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4242 (dma_addr - HOST_PHYS_BASE), size);
4244 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4249 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4251 struct cpu_dyn_regs *dyn_regs =
4252 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4253 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4254 struct gaudi_device *gaudi = hdev->asic_specific;
4255 bool invalid_queue = false;
4258 switch (hw_queue_id) {
4259 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4260 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4261 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4262 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4263 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4266 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4267 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4268 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4269 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4270 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4273 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4274 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4275 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4276 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4277 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4280 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4281 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4282 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4283 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4284 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4287 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4288 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4289 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4290 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4291 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4294 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4295 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4296 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4297 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4298 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4301 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4302 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4303 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4304 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4305 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4308 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4309 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4310 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4311 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4312 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4315 case GAUDI_QUEUE_ID_CPU_PQ:
4316 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4317 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4319 invalid_queue = true;
4322 case GAUDI_QUEUE_ID_MME_0_0:
4323 db_reg_offset = mmMME2_QM_PQ_PI_0;
4326 case GAUDI_QUEUE_ID_MME_0_1:
4327 db_reg_offset = mmMME2_QM_PQ_PI_1;
4330 case GAUDI_QUEUE_ID_MME_0_2:
4331 db_reg_offset = mmMME2_QM_PQ_PI_2;
4334 case GAUDI_QUEUE_ID_MME_0_3:
4335 db_reg_offset = mmMME2_QM_PQ_PI_3;
4338 case GAUDI_QUEUE_ID_MME_1_0:
4339 db_reg_offset = mmMME0_QM_PQ_PI_0;
4342 case GAUDI_QUEUE_ID_MME_1_1:
4343 db_reg_offset = mmMME0_QM_PQ_PI_1;
4346 case GAUDI_QUEUE_ID_MME_1_2:
4347 db_reg_offset = mmMME0_QM_PQ_PI_2;
4350 case GAUDI_QUEUE_ID_MME_1_3:
4351 db_reg_offset = mmMME0_QM_PQ_PI_3;
4354 case GAUDI_QUEUE_ID_TPC_0_0:
4355 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4358 case GAUDI_QUEUE_ID_TPC_0_1:
4359 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4362 case GAUDI_QUEUE_ID_TPC_0_2:
4363 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4366 case GAUDI_QUEUE_ID_TPC_0_3:
4367 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4370 case GAUDI_QUEUE_ID_TPC_1_0:
4371 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4374 case GAUDI_QUEUE_ID_TPC_1_1:
4375 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4378 case GAUDI_QUEUE_ID_TPC_1_2:
4379 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4382 case GAUDI_QUEUE_ID_TPC_1_3:
4383 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4386 case GAUDI_QUEUE_ID_TPC_2_0:
4387 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4390 case GAUDI_QUEUE_ID_TPC_2_1:
4391 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4394 case GAUDI_QUEUE_ID_TPC_2_2:
4395 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4398 case GAUDI_QUEUE_ID_TPC_2_3:
4399 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4402 case GAUDI_QUEUE_ID_TPC_3_0:
4403 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4406 case GAUDI_QUEUE_ID_TPC_3_1:
4407 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4410 case GAUDI_QUEUE_ID_TPC_3_2:
4411 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4414 case GAUDI_QUEUE_ID_TPC_3_3:
4415 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4418 case GAUDI_QUEUE_ID_TPC_4_0:
4419 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4422 case GAUDI_QUEUE_ID_TPC_4_1:
4423 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4426 case GAUDI_QUEUE_ID_TPC_4_2:
4427 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4430 case GAUDI_QUEUE_ID_TPC_4_3:
4431 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4434 case GAUDI_QUEUE_ID_TPC_5_0:
4435 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4438 case GAUDI_QUEUE_ID_TPC_5_1:
4439 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4442 case GAUDI_QUEUE_ID_TPC_5_2:
4443 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4446 case GAUDI_QUEUE_ID_TPC_5_3:
4447 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4450 case GAUDI_QUEUE_ID_TPC_6_0:
4451 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4454 case GAUDI_QUEUE_ID_TPC_6_1:
4455 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4458 case GAUDI_QUEUE_ID_TPC_6_2:
4459 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4462 case GAUDI_QUEUE_ID_TPC_6_3:
4463 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4466 case GAUDI_QUEUE_ID_TPC_7_0:
4467 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4470 case GAUDI_QUEUE_ID_TPC_7_1:
4471 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4474 case GAUDI_QUEUE_ID_TPC_7_2:
4475 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4478 case GAUDI_QUEUE_ID_TPC_7_3:
4479 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4482 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4483 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4484 invalid_queue = true;
4486 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4487 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4490 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4491 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4492 invalid_queue = true;
4494 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4495 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4498 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4499 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4500 invalid_queue = true;
4502 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4503 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4506 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4507 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4508 invalid_queue = true;
4510 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4511 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4514 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4515 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4516 invalid_queue = true;
4518 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4519 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4522 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4523 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4524 invalid_queue = true;
4526 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4527 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4530 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4531 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4532 invalid_queue = true;
4534 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4535 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4538 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4539 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4540 invalid_queue = true;
4542 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4543 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4546 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4547 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4548 invalid_queue = true;
4550 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4551 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4554 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4555 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4556 invalid_queue = true;
4558 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4559 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4563 invalid_queue = true;
4566 if (invalid_queue) {
4567 /* Should never get here */
4568 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4575 /* ring the doorbell */
4576 WREG32(db_reg_offset, db_value);
4578 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4579 /* make sure device CPU will read latest data from host */
4582 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4583 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4584 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4586 WREG32(irq_handler_offset,
4587 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4591 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4594 __le64 *pbd = (__le64 *) bd;
4596 /* The QMANs are on the host memory so a simple copy suffice */
4601 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4602 dma_addr_t *dma_handle, gfp_t flags)
4604 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4607 /* Shift to the device's base physical address of host memory */
4609 *dma_handle += HOST_PHYS_BASE;
4614 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4615 void *cpu_addr, dma_addr_t dma_handle)
4617 /* Cancel the device's base physical address of host memory */
4618 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4620 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4623 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4625 struct asic_fixed_properties *prop = &hdev->asic_prop;
4626 u64 cur_addr = prop->dram_user_base_address;
4627 u32 chunk_size, busy;
4630 while (cur_addr < prop->dram_end_address) {
4631 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4632 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4635 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4638 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4639 cur_addr, cur_addr + chunk_size);
4641 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4642 lower_32_bits(val));
4643 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4644 upper_32_bits(val));
4645 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4646 lower_32_bits(cur_addr));
4647 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4648 upper_32_bits(cur_addr));
4649 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4651 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4652 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4653 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4655 cur_addr += chunk_size;
4657 if (cur_addr == prop->dram_end_address)
4661 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4662 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4664 rc = hl_poll_timeout(
4666 mmDMA0_CORE_STS0 + dma_offset,
4668 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4670 HBM_SCRUBBING_TIMEOUT_US);
4674 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4684 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4686 struct asic_fixed_properties *prop = &hdev->asic_prop;
4687 u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4688 min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4689 u64 addr, size, val = hdev->memory_scrub_val;
4693 if (!hdev->memory_scrub)
4696 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4697 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4698 if (ktime_compare(ktime_get(), timeout) > 0) {
4699 dev_err(hdev->dev, "waiting for idle timeout\n");
4702 usleep_range((1000 >> 2) + 1, 1000);
4706 addr = prop->sram_user_base_address;
4707 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4709 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4710 addr, addr + size, val);
4711 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4713 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4717 /* Scrub HBM using all DMA channels in parallel */
4718 rc = gaudi_scrub_device_dram(hdev, val);
4720 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4727 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4728 u32 queue_id, dma_addr_t *dma_handle,
4731 struct gaudi_device *gaudi = hdev->asic_specific;
4732 struct gaudi_internal_qman_info *q;
4734 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4735 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4736 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4740 q = &gaudi->internal_qmans[queue_id];
4741 *dma_handle = q->pq_dma_addr;
4742 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4744 return q->pq_kernel_addr;
4747 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4748 u16 len, u32 timeout, u64 *result)
4750 struct gaudi_device *gaudi = hdev->asic_specific;
4752 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4759 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4761 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4765 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4767 struct packet_msg_prot *fence_pkt;
4768 dma_addr_t pkt_dma_addr;
4769 u32 fence_val, tmp, timeout_usec;
4770 dma_addr_t fence_dma_addr;
4775 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4777 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4779 fence_val = GAUDI_QMAN0_FENCE_VAL;
4781 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4784 "Failed to allocate memory for H/W queue %d testing\n",
4791 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4795 "Failed to allocate packet for H/W queue %d testing\n",
4798 goto free_fence_ptr;
4801 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4802 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4803 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4805 fence_pkt->ctl = cpu_to_le32(tmp);
4806 fence_pkt->value = cpu_to_le32(fence_val);
4807 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4809 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4810 sizeof(struct packet_msg_prot),
4814 "Failed to send fence packet to H/W queue %d\n",
4819 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4820 1000, timeout_usec, true);
4822 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4824 if (rc == -ETIMEDOUT) {
4826 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4827 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4832 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4834 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4838 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4840 struct gaudi_device *gaudi = hdev->asic_specific;
4843 * check capability here as send_cpu_message() won't update the result
4844 * value if no capability
4846 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4849 return hl_fw_test_cpu_queue(hdev);
4852 static int gaudi_test_queues(struct hl_device *hdev)
4854 int i, rc, ret_val = 0;
4856 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4857 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4858 rc = gaudi_test_queue(hdev, i);
4864 rc = gaudi_test_cpu_queue(hdev);
4871 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4872 gfp_t mem_flags, dma_addr_t *dma_handle)
4876 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4879 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4881 /* Shift to the device's base physical address of host memory */
4883 *dma_handle += HOST_PHYS_BASE;
4888 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4889 dma_addr_t dma_addr)
4891 /* Cancel the device's base physical address of host memory */
4892 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4894 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4897 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4898 size_t size, dma_addr_t *dma_handle)
4900 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4903 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4904 size_t size, void *vaddr)
4906 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4909 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4911 struct scatterlist *sg, *sg_next_iter;
4912 u32 count, dma_desc_cnt;
4914 dma_addr_t addr, addr_next;
4918 for_each_sgtable_dma_sg(sgt, sg, count) {
4919 len = sg_dma_len(sg);
4920 addr = sg_dma_address(sg);
4925 while ((count + 1) < sgt->nents) {
4926 sg_next_iter = sg_next(sg);
4927 len_next = sg_dma_len(sg_next_iter);
4928 addr_next = sg_dma_address(sg_next_iter);
4933 if ((addr + len == addr_next) &&
4934 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4946 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4949 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4950 struct hl_cs_parser *parser,
4951 struct packet_lin_dma *user_dma_pkt,
4952 u64 addr, enum dma_data_direction dir)
4954 struct hl_userptr *userptr;
4957 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4958 parser->job_userptr_list, &userptr))
4959 goto already_pinned;
4961 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4965 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4970 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4972 rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4974 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4978 userptr->dma_mapped = true;
4982 parser->patched_cb_size +=
4983 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4988 list_del(&userptr->job_node);
4989 hl_unpin_host_memory(hdev, userptr);
4995 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4996 struct hl_cs_parser *parser,
4997 struct packet_lin_dma *user_dma_pkt,
5000 enum dma_data_direction dir;
5001 bool skip_host_mem_pin = false, user_memset;
5005 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5006 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5007 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5011 skip_host_mem_pin = true;
5013 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5014 dir = DMA_TO_DEVICE;
5015 addr = le64_to_cpu(user_dma_pkt->src_addr);
5017 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5018 dir = DMA_FROM_DEVICE;
5019 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5020 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5021 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5024 if (skip_host_mem_pin)
5025 parser->patched_cb_size += sizeof(*user_dma_pkt);
5027 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5033 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5034 struct hl_cs_parser *parser,
5035 struct packet_lin_dma *user_dma_pkt)
5037 bool src_in_host = false;
5038 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5039 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5040 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5042 dev_dbg(hdev->dev, "DMA packet details:\n");
5043 dev_dbg(hdev->dev, "source == 0x%llx\n",
5044 le64_to_cpu(user_dma_pkt->src_addr));
5045 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5046 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5049 * Special handling for DMA with size 0. Bypass all validations
5050 * because no transactions will be done except for WR_COMP, which
5051 * is not a security issue
5053 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5054 parser->patched_cb_size += sizeof(*user_dma_pkt);
5058 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5061 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5065 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5066 struct hl_cs_parser *parser,
5067 struct packet_load_and_exe *user_pkt)
5071 cfg = le32_to_cpu(user_pkt->cfg);
5073 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5075 "User not allowed to use Load and Execute\n");
5079 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5084 static int gaudi_validate_cb(struct hl_device *hdev,
5085 struct hl_cs_parser *parser, bool is_mmu)
5087 u32 cb_parsed_length = 0;
5090 parser->patched_cb_size = 0;
5092 /* cb_user_size is more than 0 so loop will always be executed */
5093 while (cb_parsed_length < parser->user_cb_size) {
5094 enum packet_id pkt_id;
5096 struct gaudi_packet *user_pkt;
5098 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5100 pkt_id = (enum packet_id) (
5101 (le64_to_cpu(user_pkt->header) &
5102 PACKET_HEADER_PACKET_ID_MASK) >>
5103 PACKET_HEADER_PACKET_ID_SHIFT);
5105 if (!validate_packet_id(pkt_id)) {
5106 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5111 pkt_size = gaudi_packet_sizes[pkt_id];
5112 cb_parsed_length += pkt_size;
5113 if (cb_parsed_length > parser->user_cb_size) {
5115 "packet 0x%x is out of CB boundary\n", pkt_id);
5121 case PACKET_MSG_PROT:
5123 "User not allowed to use MSG_PROT\n");
5128 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5133 dev_err(hdev->dev, "User not allowed to use STOP\n");
5137 case PACKET_WREG_BULK:
5139 "User not allowed to use WREG_BULK\n");
5143 case PACKET_LOAD_AND_EXE:
5144 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5145 (struct packet_load_and_exe *) user_pkt);
5148 case PACKET_LIN_DMA:
5149 parser->contains_dma_pkt = true;
5151 parser->patched_cb_size += pkt_size;
5153 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5154 (struct packet_lin_dma *) user_pkt);
5157 case PACKET_WREG_32:
5158 case PACKET_MSG_LONG:
5159 case PACKET_MSG_SHORT:
5163 case PACKET_ARB_POINT:
5164 parser->patched_cb_size += pkt_size;
5168 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5179 * The new CB should have space at the end for two MSG_PROT packets:
5180 * 1. Optional NOP padding for cacheline alignment
5181 * 2. A packet that will act as a completion packet
5182 * 3. A packet that will generate MSI interrupt
5184 if (parser->completion)
5185 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5186 parser->patched_cb_size);
5191 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5192 struct hl_cs_parser *parser,
5193 struct packet_lin_dma *user_dma_pkt,
5194 struct packet_lin_dma *new_dma_pkt,
5195 u32 *new_dma_pkt_size)
5197 struct hl_userptr *userptr;
5198 struct scatterlist *sg, *sg_next_iter;
5199 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5201 dma_addr_t dma_addr, dma_addr_next;
5202 u64 device_memory_addr, addr;
5203 enum dma_data_direction dir;
5204 struct sg_table *sgt;
5205 bool src_in_host = false;
5206 bool skip_host_mem_pin = false;
5209 ctl = le32_to_cpu(user_dma_pkt->ctl);
5211 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5214 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5215 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5218 addr = le64_to_cpu(user_dma_pkt->src_addr);
5219 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5220 dir = DMA_TO_DEVICE;
5222 skip_host_mem_pin = true;
5224 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5225 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5226 dir = DMA_FROM_DEVICE;
5229 if ((!skip_host_mem_pin) &&
5230 (!hl_userptr_is_pinned(hdev, addr,
5231 le32_to_cpu(user_dma_pkt->tsize),
5232 parser->job_userptr_list, &userptr))) {
5233 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5234 addr, user_dma_pkt->tsize);
5238 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5239 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5240 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5244 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5249 for_each_sgtable_dma_sg(sgt, sg, count) {
5250 len = sg_dma_len(sg);
5251 dma_addr = sg_dma_address(sg);
5256 while ((count + 1) < sgt->nents) {
5257 sg_next_iter = sg_next(sg);
5258 len_next = sg_dma_len(sg_next_iter);
5259 dma_addr_next = sg_dma_address(sg_next_iter);
5264 if ((dma_addr + len == dma_addr_next) &&
5265 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5274 ctl = le32_to_cpu(user_dma_pkt->ctl);
5275 if (likely(dma_desc_cnt))
5276 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5277 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5278 new_dma_pkt->ctl = cpu_to_le32(ctl);
5279 new_dma_pkt->tsize = cpu_to_le32(len);
5281 if (dir == DMA_TO_DEVICE) {
5282 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5283 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5285 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5286 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5290 device_memory_addr += len;
5295 if (!dma_desc_cnt) {
5297 "Error of 0 SG entries when patching DMA packet\n");
5301 /* Fix the last dma packet - wrcomp must be as user set it */
5303 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5305 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5310 static int gaudi_patch_cb(struct hl_device *hdev,
5311 struct hl_cs_parser *parser)
5313 u32 cb_parsed_length = 0;
5314 u32 cb_patched_cur_length = 0;
5317 /* cb_user_size is more than 0 so loop will always be executed */
5318 while (cb_parsed_length < parser->user_cb_size) {
5319 enum packet_id pkt_id;
5321 u32 new_pkt_size = 0;
5322 struct gaudi_packet *user_pkt, *kernel_pkt;
5324 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5325 kernel_pkt = parser->patched_cb->kernel_address +
5326 cb_patched_cur_length;
5328 pkt_id = (enum packet_id) (
5329 (le64_to_cpu(user_pkt->header) &
5330 PACKET_HEADER_PACKET_ID_MASK) >>
5331 PACKET_HEADER_PACKET_ID_SHIFT);
5333 if (!validate_packet_id(pkt_id)) {
5334 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5339 pkt_size = gaudi_packet_sizes[pkt_id];
5340 cb_parsed_length += pkt_size;
5341 if (cb_parsed_length > parser->user_cb_size) {
5343 "packet 0x%x is out of CB boundary\n", pkt_id);
5349 case PACKET_LIN_DMA:
5350 rc = gaudi_patch_dma_packet(hdev, parser,
5351 (struct packet_lin_dma *) user_pkt,
5352 (struct packet_lin_dma *) kernel_pkt,
5354 cb_patched_cur_length += new_pkt_size;
5357 case PACKET_MSG_PROT:
5359 "User not allowed to use MSG_PROT\n");
5364 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5369 dev_err(hdev->dev, "User not allowed to use STOP\n");
5373 case PACKET_WREG_32:
5374 case PACKET_WREG_BULK:
5375 case PACKET_MSG_LONG:
5376 case PACKET_MSG_SHORT:
5380 case PACKET_ARB_POINT:
5381 case PACKET_LOAD_AND_EXE:
5382 memcpy(kernel_pkt, user_pkt, pkt_size);
5383 cb_patched_cur_length += pkt_size;
5387 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5400 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5401 struct hl_cs_parser *parser)
5404 u32 patched_cb_size;
5405 struct hl_cb *user_cb;
5409 * The new CB should have space at the end for two MSG_PROT packets:
5410 * 1. Optional NOP padding for cacheline alignment
5411 * 2. A packet that will act as a completion packet
5412 * 3. A packet that will generate MSI interrupt
5414 if (parser->completion)
5415 parser->patched_cb_size = parser->user_cb_size +
5416 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5418 parser->patched_cb_size = parser->user_cb_size;
5420 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5421 parser->patched_cb_size, false, false,
5426 "Failed to allocate patched CB for DMA CS %d\n",
5431 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5432 /* hl_cb_get should never fail */
5433 if (!parser->patched_cb) {
5434 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5440 * We are protected from overflow because the check
5441 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5442 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5444 * There is no option to reach here without going through that check because:
5445 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5446 * an external queue.
5447 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5449 memcpy(parser->patched_cb->kernel_address,
5450 parser->user_cb->kernel_address,
5451 parser->user_cb_size);
5453 patched_cb_size = parser->patched_cb_size;
5455 /* Validate patched CB instead of user CB */
5456 user_cb = parser->user_cb;
5457 parser->user_cb = parser->patched_cb;
5458 rc = gaudi_validate_cb(hdev, parser, true);
5459 parser->user_cb = user_cb;
5462 hl_cb_put(parser->patched_cb);
5466 if (patched_cb_size != parser->patched_cb_size) {
5467 dev_err(hdev->dev, "user CB size mismatch\n");
5468 hl_cb_put(parser->patched_cb);
5475 * Always call cb destroy here because we still have 1 reference
5476 * to it by calling cb_get earlier. After the job will be completed,
5477 * cb_put will release it, but here we want to remove it from the
5480 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5485 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5486 struct hl_cs_parser *parser)
5491 rc = gaudi_validate_cb(hdev, parser, false);
5496 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5497 parser->patched_cb_size, false, false,
5501 "Failed to allocate patched CB for DMA CS %d\n", rc);
5505 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5506 /* hl_cb_get should never fail here */
5507 if (!parser->patched_cb) {
5508 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5513 rc = gaudi_patch_cb(hdev, parser);
5516 hl_cb_put(parser->patched_cb);
5520 * Always call cb destroy here because we still have 1 reference
5521 * to it by calling cb_get earlier. After the job will be completed,
5522 * cb_put will release it, but here we want to remove it from the
5525 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5529 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5533 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5534 struct hl_cs_parser *parser)
5536 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5537 struct gaudi_device *gaudi = hdev->asic_specific;
5538 u32 nic_queue_offset, nic_mask_q_id;
5540 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5541 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5542 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5543 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5545 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5546 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5551 /* For internal queue jobs just check if CB address is valid */
5552 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5553 parser->user_cb_size,
5554 asic_prop->sram_user_base_address,
5555 asic_prop->sram_end_address))
5558 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5559 parser->user_cb_size,
5560 asic_prop->dram_user_base_address,
5561 asic_prop->dram_end_address))
5564 /* PMMU and HPMMU addresses are equal, check only one of them */
5565 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5566 parser->user_cb_size,
5567 asic_prop->pmmu.start_addr,
5568 asic_prop->pmmu.end_addr))
5572 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5573 parser->user_cb, parser->user_cb_size);
5578 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5580 struct gaudi_device *gaudi = hdev->asic_specific;
5582 if (parser->queue_type == QUEUE_TYPE_INT)
5583 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5585 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5586 return gaudi_parse_cb_mmu(hdev, parser);
5588 return gaudi_parse_cb_no_mmu(hdev, parser);
5591 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5592 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5593 u32 msi_vec, bool eb)
5595 struct gaudi_device *gaudi = hdev->asic_specific;
5596 struct packet_msg_prot *cq_pkt;
5597 struct packet_nop *cq_padding;
5601 cq_padding = kernel_address + original_len;
5602 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5604 while ((void *)cq_padding < (void *)cq_pkt) {
5605 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5609 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5610 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5613 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5615 cq_pkt->ctl = cpu_to_le32(tmp);
5616 cq_pkt->value = cpu_to_le32(cq_val);
5617 cq_pkt->addr = cpu_to_le64(cq_addr);
5621 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5622 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5623 cq_pkt->ctl = cpu_to_le32(tmp);
5624 cq_pkt->value = cpu_to_le32(1);
5626 if (gaudi->multi_msi_mode)
5627 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5629 msi_addr = mmPCIE_CORE_MSI_REQ;
5631 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5634 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5636 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5639 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5642 struct packet_lin_dma *lin_dma_pkt;
5643 struct hl_cs_job *job;
5644 u32 cb_size, ctl, err_cause;
5648 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5652 lin_dma_pkt = cb->kernel_address;
5653 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5654 cb_size = sizeof(*lin_dma_pkt);
5656 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5657 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5658 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5659 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5660 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5662 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5663 lin_dma_pkt->src_addr = cpu_to_le64(val);
5664 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5665 lin_dma_pkt->tsize = cpu_to_le32(size);
5667 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5669 dev_err(hdev->dev, "Failed to allocate a new job\n");
5674 /* Verify DMA is OK */
5675 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5676 if (err_cause && !hdev->init_done) {
5678 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5680 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5685 atomic_inc(&job->user_cb->cs_cnt);
5686 job->user_cb_size = cb_size;
5687 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5688 job->patched_cb = job->user_cb;
5689 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5691 hl_debugfs_add_job(hdev, job);
5693 rc = gaudi_send_job_on_qman0(hdev, job);
5694 hl_debugfs_remove_job(hdev, job);
5696 atomic_dec(&cb->cs_cnt);
5698 /* Verify DMA is OK */
5699 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5701 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5703 if (!hdev->init_done) {
5705 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5707 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5713 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5718 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5719 u32 num_regs, u32 val)
5721 struct packet_msg_long *pkt;
5722 struct hl_cs_job *job;
5727 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5729 if (cb_size > SZ_2M) {
5730 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5734 cb = hl_cb_kernel_create(hdev, cb_size, false);
5738 pkt = cb->kernel_address;
5740 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5741 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5742 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5743 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5744 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5746 for (i = 0; i < num_regs ; i++, pkt++) {
5747 pkt->ctl = cpu_to_le32(ctl);
5748 pkt->value = cpu_to_le32(val);
5749 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5752 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5754 dev_err(hdev->dev, "Failed to allocate a new job\n");
5761 atomic_inc(&job->user_cb->cs_cnt);
5762 job->user_cb_size = cb_size;
5763 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5764 job->patched_cb = job->user_cb;
5765 job->job_cb_size = cb_size;
5767 hl_debugfs_add_job(hdev, job);
5769 rc = gaudi_send_job_on_qman0(hdev, job);
5770 hl_debugfs_remove_job(hdev, job);
5772 atomic_dec(&cb->cs_cnt);
5776 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5781 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5787 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5788 num_regs = NUM_OF_SOB_IN_BLOCK;
5789 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5791 dev_err(hdev->dev, "failed resetting SM registers");
5795 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5796 num_regs = NUM_OF_SOB_IN_BLOCK;
5797 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5799 dev_err(hdev->dev, "failed resetting SM registers");
5803 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5804 num_regs = NUM_OF_SOB_IN_BLOCK;
5805 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5807 dev_err(hdev->dev, "failed resetting SM registers");
5811 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5812 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5813 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5815 dev_err(hdev->dev, "failed resetting SM registers");
5819 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5820 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5821 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5823 dev_err(hdev->dev, "failed resetting SM registers");
5827 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5828 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5829 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5831 dev_err(hdev->dev, "failed resetting SM registers");
5835 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5836 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5837 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5838 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5840 dev_err(hdev->dev, "failed resetting SM registers");
5844 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5845 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5846 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5847 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5849 dev_err(hdev->dev, "failed resetting SM registers");
5856 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5858 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5859 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5862 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5863 u64 sob_addr = CFG_BASE +
5864 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5866 u32 dma_offset = i * DMA_CORE_OFFSET;
5868 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5869 lower_32_bits(sob_addr));
5870 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5871 upper_32_bits(sob_addr));
5872 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5874 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5875 * modified by the user for SRAM reduction
5878 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5883 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5888 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5889 qman_offset = i * DMA_QMAN_OFFSET;
5890 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5893 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5894 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5895 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5898 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5899 qman_offset = i * TPC_QMAN_OFFSET;
5900 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5903 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5904 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5905 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5906 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5910 static int gaudi_restore_user_registers(struct hl_device *hdev)
5914 rc = gaudi_restore_sm_registers(hdev);
5918 gaudi_restore_dma_registers(hdev);
5919 gaudi_restore_qm_registers(hdev);
5924 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5929 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5931 u32 size = hdev->asic_prop.mmu_pgt_size +
5932 hdev->asic_prop.mmu_cache_mng_size;
5933 struct gaudi_device *gaudi = hdev->asic_specific;
5934 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5936 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5939 return gaudi_memset_device_memory(hdev, addr, size, 0);
5942 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5947 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5948 u32 size_to_dma, dma_addr_t dma_addr)
5954 dma_offset = dma_id * DMA_CORE_OFFSET;
5956 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5957 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5958 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5959 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5960 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5961 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5962 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5964 rc = hl_poll_timeout(
5966 mmDMA0_CORE_STS0 + dma_offset,
5968 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5974 "DMA %d timed-out during reading of 0x%llx\n",
5979 /* Verify DMA is OK */
5980 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5982 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5984 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5986 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5994 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5997 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5998 u32 qm_glbl_sts0, qm_cgm_sts;
5999 u64 dma_offset, qm_offset;
6000 dma_addr_t dma_addr;
6005 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6010 hdev->asic_funcs->hw_queues_lock(hdev);
6012 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6013 dma_offset = dma_id * DMA_CORE_OFFSET;
6014 qm_offset = dma_id * DMA_QMAN_OFFSET;
6015 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6016 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6017 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6018 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6019 IS_DMA_IDLE(dma_core_sts0);
6022 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6023 dma_offset = dma_id * DMA_CORE_OFFSET;
6024 qm_offset = dma_id * DMA_QMAN_OFFSET;
6025 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6026 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6027 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6028 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6029 IS_DMA_IDLE(dma_core_sts0);
6032 dev_err_ratelimited(hdev->dev,
6033 "Can't read via DMA because it is BUSY\n");
6039 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6040 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6041 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6043 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6044 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6047 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6049 /* Verify DMA is OK */
6050 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6053 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6055 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6060 size_to_dma = SZ_2M;
6062 while (size_left > 0) {
6064 if (size_left < SZ_2M)
6065 size_to_dma = size_left;
6067 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6072 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6074 if (size_left <= SZ_2M)
6082 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6083 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6086 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6087 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6089 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6092 hdev->asic_funcs->hw_queues_unlock(hdev);
6094 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6099 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6101 struct gaudi_device *gaudi = hdev->asic_specific;
6103 if (hdev->reset_info.hard_reset_pending)
6106 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6107 (addr - gaudi->hbm_bar_cur_addr));
6110 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6112 struct gaudi_device *gaudi = hdev->asic_specific;
6114 if (hdev->reset_info.hard_reset_pending)
6117 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6118 (addr - gaudi->hbm_bar_cur_addr));
6121 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6123 /* mask to zero the MMBP and ASID bits */
6124 WREG32_AND(reg, ~0x7FF);
6125 WREG32_OR(reg, asid);
6128 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6130 struct gaudi_device *gaudi = hdev->asic_specific;
6132 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6135 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6136 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6140 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6146 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6159 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6160 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6170 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6194 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6201 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6202 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6203 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6213 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6214 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6215 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6216 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6217 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6218 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6219 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6221 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6222 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6223 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6224 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6225 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6226 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6227 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6229 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6230 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6231 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6232 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6233 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6234 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6235 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6237 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6238 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6239 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6240 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6241 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6242 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6243 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6245 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6246 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6247 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6248 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6249 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6250 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6251 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6253 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6254 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6255 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6256 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6257 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6258 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6259 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6261 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6262 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6263 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6264 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6265 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6266 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6267 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6268 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6269 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6270 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6272 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6273 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6274 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6275 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6276 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6277 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6278 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6279 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6280 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6281 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6282 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6283 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6285 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6286 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6288 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6290 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6292 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6294 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6298 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6299 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6301 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6303 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6305 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6307 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6311 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6312 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6314 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6316 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6318 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6320 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6324 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6325 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6327 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6329 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6331 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6333 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6337 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6338 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6340 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6342 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6344 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6346 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6350 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6351 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6353 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6355 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6357 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6359 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6363 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6364 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6366 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6368 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6370 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6372 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6376 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6377 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6379 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6381 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6383 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6385 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6389 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6390 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6392 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6394 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6396 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6398 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6402 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6403 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6405 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6407 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6409 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6411 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6415 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6416 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6419 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6420 struct hl_cs_job *job)
6422 struct packet_msg_prot *fence_pkt;
6424 dma_addr_t fence_dma_addr;
6426 u32 tmp, timeout, dma_offset;
6430 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6432 timeout = HL_DEVICE_TIMEOUT_USEC;
6434 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6435 dev_err_ratelimited(hdev->dev,
6436 "Can't send driver job on QMAN0 because the device is not idle\n");
6440 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6443 "Failed to allocate fence memory for QMAN0\n");
6447 cb = job->patched_cb;
6449 fence_pkt = cb->kernel_address +
6450 job->job_cb_size - sizeof(struct packet_msg_prot);
6452 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6453 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6454 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6456 fence_pkt->ctl = cpu_to_le32(tmp);
6457 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6458 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6460 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6462 WREG32(mmDMA0_CORE_PROT + dma_offset,
6463 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6465 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6466 job->job_cb_size, cb->bus_address);
6468 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6469 goto free_fence_ptr;
6472 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6473 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6476 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6478 if (rc == -ETIMEDOUT) {
6479 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6480 goto free_fence_ptr;
6484 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6486 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6490 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6492 if (event_type >= GAUDI_EVENT_SIZE)
6493 goto event_not_supported;
6495 if (!gaudi_irq_map_table[event_type].valid)
6496 goto event_not_supported;
6498 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6502 event_not_supported:
6503 snprintf(desc, size, "N/A");
6506 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6507 bool is_write, s32 *engine_id_1,
6510 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6512 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6513 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6516 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6517 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6521 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6522 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6526 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6527 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6531 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6532 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6537 goto unknown_initiator;
6540 for (i = 0 ; i < 2 ; i++) {
6541 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6542 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6546 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6547 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6548 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6549 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6551 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6552 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6555 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6556 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6557 return "DMA0 or DMA2";
6559 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6560 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6561 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6562 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6564 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6565 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6568 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6569 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6570 return "DMA1 or DMA3";
6572 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6573 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6574 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6575 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6577 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6578 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6581 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6582 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6583 return "DMA4 or DMA6";
6585 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6586 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6587 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6588 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6590 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6591 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6594 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6595 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6596 return "DMA5 or DMA7";
6601 return "unknown initiator";
6604 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6605 u32 *engine_id_1, u32 *engine_id_2)
6607 u32 val, x_y, axi_id;
6609 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6610 RREG32(mmMMU_UP_RAZWI_READ_ID);
6611 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6612 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6613 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6614 RAZWI_INITIATOR_AXI_ID_SHIFT);
6617 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6618 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6619 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6622 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6623 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6627 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6628 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6630 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6631 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6632 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6634 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6635 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6636 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6638 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6639 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6641 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6642 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6643 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6646 /* PCI, CPU or PSOC does not have engine id*/
6647 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6649 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6651 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6654 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6655 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6656 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6657 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6658 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6659 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6660 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6661 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6662 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6663 engine_id_1, engine_id_2);
6664 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6665 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6666 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6669 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6670 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6673 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6674 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6678 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6679 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6681 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6682 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6683 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6685 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6686 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6687 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6689 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6690 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6692 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6693 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6694 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6697 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6698 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6701 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6702 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6711 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6713 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6714 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6715 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6716 RAZWI_INITIATOR_AXI_ID_MASK);
6718 return "unknown initiator";
6721 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
6725 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6726 dev_err_ratelimited(hdev->dev,
6727 "RAZWI event caused by illegal write of %s\n",
6728 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6729 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6732 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6733 dev_err_ratelimited(hdev->dev,
6734 "RAZWI event caused by illegal read of %s\n",
6735 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6736 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6740 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
6742 struct gaudi_device *gaudi = hdev->asic_specific;
6745 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6748 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6749 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6750 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6752 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6754 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6755 *type = HL_RAZWI_PAGE_FAULT;
6757 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6760 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6761 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6762 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6764 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6766 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6767 *type = HL_RAZWI_MMU_ACCESS_ERROR;
6769 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6774 * +-------------------+------------------------------------------------------+
6775 * | Configuration Reg | Description |
6777 * +-------------------+------------------------------------------------------+
6778 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6779 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6780 * | |0xF34 memory wrappers 63:32 |
6781 * | |0xF38 memory wrappers 95:64 |
6782 * | |0xF3C memory wrappers 127:96 |
6783 * +-------------------+------------------------------------------------------+
6784 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6785 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6786 * | |0xF44 memory wrappers 63:32 |
6787 * | |0xF48 memory wrappers 95:64 |
6788 * | |0xF4C memory wrappers 127:96 |
6789 * +-------------------+------------------------------------------------------+
6791 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6792 struct ecc_info_extract_params *params, u64 *ecc_address,
6793 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6795 u32 i, num_mem_regs, reg, err_bit;
6796 u64 err_addr, err_word = 0;
6798 num_mem_regs = params->num_memories / 32 +
6799 ((params->num_memories % 32) ? 1 : 0);
6801 if (params->block_address >= CFG_BASE)
6802 params->block_address -= CFG_BASE;
6805 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6807 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6809 /* Set invalid wrapper index */
6810 *memory_wrapper_idx = 0xFF;
6812 /* Iterate through memory wrappers, a single bit must be set */
6813 for (i = 0 ; i < num_mem_regs ; i++) {
6815 err_word = RREG32(err_addr);
6817 err_bit = __ffs(err_word);
6818 *memory_wrapper_idx = err_bit + (32 * i);
6823 if (*memory_wrapper_idx == 0xFF) {
6824 dev_err(hdev->dev, "ECC error information cannot be found\n");
6828 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6829 *memory_wrapper_idx);
6832 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6834 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6836 /* Clear error indication */
6837 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6839 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6841 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6843 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6849 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6851 * @idx: the current pi/ci value
6852 * @q_len: the queue length (power of 2)
6854 * @return the cyclically decremented index
6856 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6858 u32 mask = q_len - 1;
6861 * modular decrement is equivalent to adding (queue_size -1)
6862 * later we take LSBs to make sure the value is in the
6863 * range [0, queue_len - 1]
6865 return (idx + q_len - 1) & mask;
6869 * gaudi_handle_sw_config_stream_data - print SW config stream data
6871 * @hdev: pointer to the habanalabs device structure
6872 * @stream: the QMAN's stream
6873 * @qman_base: base address of QMAN registers block
6874 * @event_mask: mask of the last events occurred
6876 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6877 u64 qman_base, u64 event_mask)
6879 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6880 u32 cq_ptr_lo_off, size;
6882 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6884 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6885 stream * cq_ptr_lo_off;
6886 cq_ptr_hi = cq_ptr_lo +
6887 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6888 cq_tsize = cq_ptr_lo +
6889 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6891 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6892 size = RREG32(cq_tsize);
6893 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6894 stream, cq_ptr, size);
6896 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6897 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6898 hdev->captured_err_info.undef_opcode.cq_size = size;
6899 hdev->captured_err_info.undef_opcode.stream_id = stream;
6904 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6906 * @hdev: pointer to the habanalabs device structure
6907 * @qid_base: first QID of the QMAN (out of 4 streams)
6908 * @stream: the QMAN's stream
6909 * @qman_base: base address of QMAN registers block
6910 * @event_mask: mask of the last events occurred
6911 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6913 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6914 u32 stream, u64 qman_base,
6918 u32 ci, qm_ci_stream_off, queue_len;
6919 struct hl_hw_queue *q;
6920 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6923 q = &hdev->kernel_queues[qid_base + stream];
6925 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6926 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6927 stream * qm_ci_stream_off;
6929 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6930 q->int_queue_len : HL_QUEUE_LENGTH;
6932 hdev->asic_funcs->hw_queues_lock(hdev);
6935 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6939 /* we should start printing form ci -1 */
6940 ci = gaudi_queue_idx_dec(ci, queue_len);
6941 memset(addr, 0, sizeof(addr));
6943 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6947 bd = q->kernel_address;
6950 len = le32_to_cpu(bd->len);
6951 /* len 0 means uninitialized entry- break */
6955 addr[i] = le64_to_cpu(bd->ptr);
6957 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6958 stream, ci, addr[i], len);
6960 /* get previous ci, wrap if needed */
6961 ci = gaudi_queue_idx_dec(ci, queue_len);
6964 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6965 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6966 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6969 undef_opcode->timestamp = ktime_get();
6970 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6973 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6974 undef_opcode->cb_addr_streams_len++;
6977 hdev->asic_funcs->hw_queues_unlock(hdev);
6981 * handle_qman_data_on_err - extract QMAN data on error
6983 * @hdev: pointer to the habanalabs device structure
6984 * @qid_base: first QID of the QMAN (out of 4 streams)
6985 * @stream: the QMAN's stream
6986 * @qman_base: base address of QMAN registers block
6987 * @event_mask: mask of the last events occurred
6989 * This function attempt to exatract as much data as possible on QMAN error.
6990 * On upper CP print the SW config stream data and last 8 PQEs.
6991 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6993 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6994 u32 stream, u64 qman_base, u64 event_mask)
6998 if (stream != QMAN_STREAMS) {
6999 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7000 qman_base, event_mask, true);
7004 /* handle Lower-CP */
7005 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7007 for (i = 0; i < QMAN_STREAMS; i++)
7008 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7009 qman_base, event_mask, false);
7012 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7013 const char *qm_name,
7018 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7019 u64 glbl_sts_addr, arb_err_addr;
7022 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7023 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7025 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7026 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7027 glbl_sts_clr_val = 0;
7028 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7033 if (i == QMAN_STREAMS)
7034 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7036 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7038 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7039 if (glbl_sts_val & BIT(j)) {
7040 dev_err_ratelimited(hdev->dev,
7041 "%s %s. err cause: %s\n",
7043 gaudi_qman_error_cause[j]);
7044 glbl_sts_clr_val |= BIT(j);
7047 /* check for undefined opcode */
7048 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7049 hdev->captured_err_info.undef_opcode.write_enable) {
7050 memset(&hdev->captured_err_info.undef_opcode, 0,
7051 sizeof(hdev->captured_err_info.undef_opcode));
7053 hdev->captured_err_info.undef_opcode.write_enable = false;
7054 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7057 /* Write 1 clear errors */
7058 if (!hdev->stop_on_err)
7059 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7061 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7064 arb_err_val = RREG32(arb_err_addr);
7069 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7070 if (arb_err_val & BIT(j)) {
7071 dev_err_ratelimited(hdev->dev,
7072 "%s ARB_ERR. err cause: %s\n",
7074 gaudi_qman_arb_error_cause[j]);
7079 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7080 struct hl_eq_sm_sei_data *sei_data)
7082 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7084 /* Flip the bits as the enum is ordered in the opposite way */
7085 index = (index ^ 0x3) & 0x3;
7087 switch (sei_data->sei_cause) {
7088 case SM_SEI_SO_OVERFLOW:
7089 dev_err_ratelimited(hdev->dev,
7090 "%s SEI Error: SOB Group %u overflow/underflow",
7091 gaudi_sync_manager_names[index],
7092 le32_to_cpu(sei_data->sei_log));
7094 case SM_SEI_LBW_4B_UNALIGNED:
7095 dev_err_ratelimited(hdev->dev,
7096 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7097 gaudi_sync_manager_names[index],
7098 le32_to_cpu(sei_data->sei_log));
7100 case SM_SEI_AXI_RESPONSE_ERR:
7101 dev_err_ratelimited(hdev->dev,
7102 "%s SEI Error: AXI ID %u response error",
7103 gaudi_sync_manager_names[index],
7104 le32_to_cpu(sei_data->sei_log));
7107 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7108 le32_to_cpu(sei_data->sei_log));
7113 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7114 struct hl_eq_ecc_data *ecc_data)
7116 struct ecc_info_extract_params params;
7117 u64 ecc_address = 0, ecc_syndrom = 0;
7118 u8 index, memory_wrapper_idx = 0;
7119 bool extract_info_from_fw;
7122 if (hdev->asic_prop.fw_security_enabled) {
7123 extract_info_from_fw = true;
7124 goto extract_ecc_info;
7127 switch (event_type) {
7128 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7129 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7130 extract_info_from_fw = true;
7132 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7133 index = event_type - GAUDI_EVENT_TPC0_SERR;
7134 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7135 params.num_memories = 90;
7136 params.derr = false;
7137 extract_info_from_fw = false;
7139 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7140 index = event_type - GAUDI_EVENT_TPC0_DERR;
7141 params.block_address =
7142 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7143 params.num_memories = 90;
7145 extract_info_from_fw = false;
7147 case GAUDI_EVENT_MME0_ACC_SERR:
7148 case GAUDI_EVENT_MME1_ACC_SERR:
7149 case GAUDI_EVENT_MME2_ACC_SERR:
7150 case GAUDI_EVENT_MME3_ACC_SERR:
7151 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7152 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7153 params.num_memories = 128;
7154 params.derr = false;
7155 extract_info_from_fw = false;
7157 case GAUDI_EVENT_MME0_ACC_DERR:
7158 case GAUDI_EVENT_MME1_ACC_DERR:
7159 case GAUDI_EVENT_MME2_ACC_DERR:
7160 case GAUDI_EVENT_MME3_ACC_DERR:
7161 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7162 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7163 params.num_memories = 128;
7165 extract_info_from_fw = false;
7167 case GAUDI_EVENT_MME0_SBAB_SERR:
7168 case GAUDI_EVENT_MME1_SBAB_SERR:
7169 case GAUDI_EVENT_MME2_SBAB_SERR:
7170 case GAUDI_EVENT_MME3_SBAB_SERR:
7171 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7172 params.block_address =
7173 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7174 params.num_memories = 33;
7175 params.derr = false;
7176 extract_info_from_fw = false;
7178 case GAUDI_EVENT_MME0_SBAB_DERR:
7179 case GAUDI_EVENT_MME1_SBAB_DERR:
7180 case GAUDI_EVENT_MME2_SBAB_DERR:
7181 case GAUDI_EVENT_MME3_SBAB_DERR:
7182 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7183 params.block_address =
7184 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7185 params.num_memories = 33;
7187 extract_info_from_fw = false;
7194 if (extract_info_from_fw) {
7195 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7196 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7197 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7199 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7200 &ecc_syndrom, &memory_wrapper_idx);
7206 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7207 ecc_address, ecc_syndrom, memory_wrapper_idx);
7210 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7217 switch (event_type) {
7218 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7219 /* In TPC QM event, notify on TPC assertion. While there isn't
7220 * a specific event for assertion yet, the FW generates QM event.
7221 * The SW upper layer will inspect an internal mapped area to indicate
7222 * if the event is a tpc assertion or tpc QM.
7224 *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7225 index = event_type - GAUDI_EVENT_TPC0_QM;
7226 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7227 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7228 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7230 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7231 if (event_type == GAUDI_EVENT_MME0_QM) {
7233 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7234 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7236 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7238 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7239 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7241 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7242 index = event_type - GAUDI_EVENT_DMA0_QM;
7243 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7244 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7247 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7248 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7250 case GAUDI_EVENT_NIC0_QM0:
7251 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7252 qman_base = mmNIC0_QM0_BASE;
7253 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7255 case GAUDI_EVENT_NIC0_QM1:
7256 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7257 qman_base = mmNIC0_QM1_BASE;
7258 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7260 case GAUDI_EVENT_NIC1_QM0:
7261 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7262 qman_base = mmNIC1_QM0_BASE;
7263 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7265 case GAUDI_EVENT_NIC1_QM1:
7266 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7267 qman_base = mmNIC1_QM1_BASE;
7268 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7270 case GAUDI_EVENT_NIC2_QM0:
7271 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7272 qman_base = mmNIC2_QM0_BASE;
7273 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7275 case GAUDI_EVENT_NIC2_QM1:
7276 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7277 qman_base = mmNIC2_QM1_BASE;
7278 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7280 case GAUDI_EVENT_NIC3_QM0:
7281 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7282 qman_base = mmNIC3_QM0_BASE;
7283 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7285 case GAUDI_EVENT_NIC3_QM1:
7286 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7287 qman_base = mmNIC3_QM1_BASE;
7288 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7290 case GAUDI_EVENT_NIC4_QM0:
7291 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7292 qman_base = mmNIC4_QM0_BASE;
7293 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7295 case GAUDI_EVENT_NIC4_QM1:
7296 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7297 qman_base = mmNIC4_QM1_BASE;
7298 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7304 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7307 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7310 u32 engine_id_1, engine_id_2;
7317 * Init engine id by default as not valid and only if razwi initiated from engine with
7318 * engine id it will get valid value.
7319 * Init razwi type to default, will be changed only if razwi caused by page fault of
7322 engine_id_1 = U16_MAX;
7323 engine_id_2 = U16_MAX;
7324 razwi_type = U8_MAX;
7326 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7327 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7331 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7332 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7334 /* In case it's the first razwi, save its parameters*/
7335 rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
7337 hdev->captured_err_info.razwi.timestamp = ktime_get();
7338 hdev->captured_err_info.razwi.addr = razwi_addr;
7339 hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
7340 hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
7342 * If first engine id holds non valid value the razwi initiator
7343 * does not have engine id
7345 hdev->captured_err_info.razwi.non_engine_initiator =
7346 (engine_id_1 == U16_MAX);
7347 hdev->captured_err_info.razwi.type = razwi_type;
7353 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7354 struct cpucp_pkt_sync_err *sync_err)
7356 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7358 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7359 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7362 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7363 struct hl_eq_fw_alive *fw_alive)
7366 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7367 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7368 "Minor" : "Critical", fw_alive->process_id,
7369 fw_alive->thread_id, fw_alive->uptime_seconds);
7372 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7375 char desc[64] = "", *type;
7376 struct eq_nic_sei_event *eq_nic_sei = data;
7377 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7379 switch (eq_nic_sei->axi_error_cause) {
7396 type = "NON_AXI_ERR";
7402 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7403 eq_nic_sei->axi_error_cause);
7408 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7410 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7414 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7416 /* GAUDI doesn't support any reset except hard-reset */
7420 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7421 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7423 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7426 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7427 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7428 if (!hbm_ecc_data) {
7429 dev_err(hdev->dev, "No FW ECC data");
7433 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7434 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7435 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7436 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7437 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7438 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7439 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7440 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7441 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7442 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7443 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7444 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7445 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7446 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7449 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7450 device, ch, wr_par, rd_par, ca_par, serr, derr);
7452 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7453 device, ch, hbm_ecc_data->first_addr, type,
7454 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7455 hbm_ecc_data->dec_cnt);
7459 if (hdev->asic_prop.fw_security_enabled) {
7460 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7464 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7465 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7466 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7467 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7471 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7472 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7473 (val >> 2) & 0x1, (val >> 3) & 0x1,
7476 val2 = RREG32(base + ch * 0x1000 + 0x060);
7478 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7480 RREG32(base + ch * 0x1000 + 0x064),
7481 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7482 (val2 & 0xFF0000) >> 16,
7483 (val2 & 0xFF000000) >> 24);
7486 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7487 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7491 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7492 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7493 (val >> 2) & 0x1, (val >> 3) & 0x1,
7496 val2 = RREG32(base + ch * 0x1000 + 0x070);
7498 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7500 RREG32(base + ch * 0x1000 + 0x074),
7501 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7502 (val2 & 0xFF0000) >> 16,
7503 (val2 & 0xFF000000) >> 24);
7506 /* Clear interrupts */
7507 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7508 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7509 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7510 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7511 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7512 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7515 val = RREG32(base + 0x8F30);
7516 val2 = RREG32(base + 0x8F34);
7520 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7523 val = RREG32(base + 0x8F40);
7524 val2 = RREG32(base + 0x8F44);
7528 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7535 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7537 switch (hbm_event_type) {
7538 case GAUDI_EVENT_HBM0_SPI_0:
7539 case GAUDI_EVENT_HBM0_SPI_1:
7541 case GAUDI_EVENT_HBM1_SPI_0:
7542 case GAUDI_EVENT_HBM1_SPI_1:
7544 case GAUDI_EVENT_HBM2_SPI_0:
7545 case GAUDI_EVENT_HBM2_SPI_1:
7547 case GAUDI_EVENT_HBM3_SPI_0:
7548 case GAUDI_EVENT_HBM3_SPI_1:
7554 /* Should never happen */
7558 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7559 char *interrupt_name)
7561 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7562 bool soft_reset_required = false;
7564 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7565 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7567 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7568 if (tpc_interrupts_cause & BIT(i)) {
7569 dev_err_ratelimited(hdev->dev,
7570 "TPC%d_%s interrupt cause: %s\n",
7571 tpc_id, interrupt_name,
7572 gaudi_tpc_interrupts_cause[i]);
7573 /* If this is QM error, we need to soft-reset */
7575 soft_reset_required = true;
7578 /* Clear interrupts */
7579 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7581 return soft_reset_required;
7584 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7586 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7589 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7591 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7594 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
7596 ktime_t zero_time = ktime_set(0, 0);
7598 mutex_lock(&hdev->clk_throttling.lock);
7600 switch (event_type) {
7601 case GAUDI_EVENT_FIX_POWER_ENV_S:
7602 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7603 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7604 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7605 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7606 dev_info_ratelimited(hdev->dev,
7607 "Clock throttling due to power consumption\n");
7610 case GAUDI_EVENT_FIX_POWER_ENV_E:
7611 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7612 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7613 dev_info_ratelimited(hdev->dev,
7614 "Power envelop is safe, back to optimal clock\n");
7617 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7618 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7619 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7620 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7621 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7622 dev_info_ratelimited(hdev->dev,
7623 "Clock throttling due to overheating\n");
7626 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7627 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7628 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7629 dev_info_ratelimited(hdev->dev,
7630 "Thermal envelop is safe, back to optimal clock\n");
7634 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7639 mutex_unlock(&hdev->clk_throttling.lock);
7642 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7644 struct gaudi_device *gaudi = hdev->asic_specific;
7645 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7646 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7647 u32 fw_fatal_err_flag = 0, flags = 0;
7648 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7649 >> EQ_CTL_EVENT_TYPE_SHIFT);
7650 bool reset_required, reset_direct = false;
7654 if (event_type >= GAUDI_EVENT_SIZE) {
7655 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7656 event_type, GAUDI_EVENT_SIZE - 1);
7660 gaudi->events_stat[event_type]++;
7661 gaudi->events_stat_aggregate[event_type]++;
7663 switch (event_type) {
7664 case GAUDI_EVENT_PCIE_CORE_DERR:
7665 case GAUDI_EVENT_PCIE_IF_DERR:
7666 case GAUDI_EVENT_PCIE_PHY_DERR:
7667 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7668 case GAUDI_EVENT_MME0_ACC_DERR:
7669 case GAUDI_EVENT_MME0_SBAB_DERR:
7670 case GAUDI_EVENT_MME1_ACC_DERR:
7671 case GAUDI_EVENT_MME1_SBAB_DERR:
7672 case GAUDI_EVENT_MME2_ACC_DERR:
7673 case GAUDI_EVENT_MME2_SBAB_DERR:
7674 case GAUDI_EVENT_MME3_ACC_DERR:
7675 case GAUDI_EVENT_MME3_SBAB_DERR:
7676 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7678 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7679 case GAUDI_EVENT_PSOC_MEM_DERR:
7680 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7681 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7682 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7683 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7684 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7685 case GAUDI_EVENT_MMU_DERR:
7686 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7687 gaudi_print_irq_info(hdev, event_type, true);
7688 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7689 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7690 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7693 case GAUDI_EVENT_GIC500:
7694 case GAUDI_EVENT_AXI_ECC:
7695 case GAUDI_EVENT_L2_RAM_ECC:
7696 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7697 gaudi_print_irq_info(hdev, event_type, false);
7698 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7699 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7702 case GAUDI_EVENT_HBM0_SPI_0:
7703 case GAUDI_EVENT_HBM1_SPI_0:
7704 case GAUDI_EVENT_HBM2_SPI_0:
7705 case GAUDI_EVENT_HBM3_SPI_0:
7706 gaudi_print_irq_info(hdev, event_type, false);
7707 gaudi_hbm_read_interrupts(hdev,
7708 gaudi_hbm_event_to_dev(event_type),
7709 &eq_entry->hbm_ecc_data);
7710 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7711 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7714 case GAUDI_EVENT_HBM0_SPI_1:
7715 case GAUDI_EVENT_HBM1_SPI_1:
7716 case GAUDI_EVENT_HBM2_SPI_1:
7717 case GAUDI_EVENT_HBM3_SPI_1:
7718 gaudi_print_irq_info(hdev, event_type, false);
7719 gaudi_hbm_read_interrupts(hdev,
7720 gaudi_hbm_event_to_dev(event_type),
7721 &eq_entry->hbm_ecc_data);
7722 hl_fw_unmask_irq(hdev, event_type);
7723 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7726 case GAUDI_EVENT_TPC0_DEC:
7727 case GAUDI_EVENT_TPC1_DEC:
7728 case GAUDI_EVENT_TPC2_DEC:
7729 case GAUDI_EVENT_TPC3_DEC:
7730 case GAUDI_EVENT_TPC4_DEC:
7731 case GAUDI_EVENT_TPC5_DEC:
7732 case GAUDI_EVENT_TPC6_DEC:
7733 case GAUDI_EVENT_TPC7_DEC:
7734 gaudi_print_irq_info(hdev, event_type, true);
7735 reset_required = gaudi_tpc_read_interrupts(hdev,
7736 tpc_dec_event_to_tpc_id(event_type),
7737 "AXI_SLV_DEC_Error");
7738 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7739 if (reset_required) {
7740 dev_err(hdev->dev, "reset required due to %s\n",
7741 gaudi_irq_map_table[event_type].name);
7743 reset_direct = true;
7746 hl_fw_unmask_irq(hdev, event_type);
7747 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7751 case GAUDI_EVENT_TPC0_KRN_ERR:
7752 case GAUDI_EVENT_TPC1_KRN_ERR:
7753 case GAUDI_EVENT_TPC2_KRN_ERR:
7754 case GAUDI_EVENT_TPC3_KRN_ERR:
7755 case GAUDI_EVENT_TPC4_KRN_ERR:
7756 case GAUDI_EVENT_TPC5_KRN_ERR:
7757 case GAUDI_EVENT_TPC6_KRN_ERR:
7758 case GAUDI_EVENT_TPC7_KRN_ERR:
7759 gaudi_print_irq_info(hdev, event_type, true);
7760 reset_required = gaudi_tpc_read_interrupts(hdev,
7761 tpc_krn_event_to_tpc_id(event_type),
7763 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7764 if (reset_required) {
7765 dev_err(hdev->dev, "reset required due to %s\n",
7766 gaudi_irq_map_table[event_type].name);
7768 reset_direct = true;
7771 hl_fw_unmask_irq(hdev, event_type);
7772 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7776 case GAUDI_EVENT_PCIE_CORE_SERR:
7777 case GAUDI_EVENT_PCIE_IF_SERR:
7778 case GAUDI_EVENT_PCIE_PHY_SERR:
7779 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7780 case GAUDI_EVENT_MME0_ACC_SERR:
7781 case GAUDI_EVENT_MME0_SBAB_SERR:
7782 case GAUDI_EVENT_MME1_ACC_SERR:
7783 case GAUDI_EVENT_MME1_SBAB_SERR:
7784 case GAUDI_EVENT_MME2_ACC_SERR:
7785 case GAUDI_EVENT_MME2_SBAB_SERR:
7786 case GAUDI_EVENT_MME3_ACC_SERR:
7787 case GAUDI_EVENT_MME3_SBAB_SERR:
7788 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7789 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7790 case GAUDI_EVENT_PSOC_MEM_SERR:
7791 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7792 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7793 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7794 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7795 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7797 case GAUDI_EVENT_MMU_SERR:
7798 gaudi_print_irq_info(hdev, event_type, true);
7799 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7800 hl_fw_unmask_irq(hdev, event_type);
7801 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7804 case GAUDI_EVENT_PCIE_DEC:
7805 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7806 case GAUDI_EVENT_PSOC_AXI_DEC:
7807 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7808 gaudi_print_irq_info(hdev, event_type, true);
7809 hl_fw_unmask_irq(hdev, event_type);
7810 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7813 case GAUDI_EVENT_MMU_PAGE_FAULT:
7814 case GAUDI_EVENT_MMU_WR_PERM:
7815 gaudi_print_irq_info(hdev, event_type, true);
7816 hl_fw_unmask_irq(hdev, event_type);
7817 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7820 case GAUDI_EVENT_MME0_WBC_RSP:
7821 case GAUDI_EVENT_MME0_SBAB0_RSP:
7822 case GAUDI_EVENT_MME1_WBC_RSP:
7823 case GAUDI_EVENT_MME1_SBAB0_RSP:
7824 case GAUDI_EVENT_MME2_WBC_RSP:
7825 case GAUDI_EVENT_MME2_SBAB0_RSP:
7826 case GAUDI_EVENT_MME3_WBC_RSP:
7827 case GAUDI_EVENT_MME3_SBAB0_RSP:
7828 case GAUDI_EVENT_RAZWI_OR_ADC:
7829 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7830 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7832 case GAUDI_EVENT_NIC0_QM0:
7833 case GAUDI_EVENT_NIC0_QM1:
7834 case GAUDI_EVENT_NIC1_QM0:
7835 case GAUDI_EVENT_NIC1_QM1:
7836 case GAUDI_EVENT_NIC2_QM0:
7837 case GAUDI_EVENT_NIC2_QM1:
7838 case GAUDI_EVENT_NIC3_QM0:
7839 case GAUDI_EVENT_NIC3_QM1:
7840 case GAUDI_EVENT_NIC4_QM0:
7841 case GAUDI_EVENT_NIC4_QM1:
7842 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7843 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7844 gaudi_print_irq_info(hdev, event_type, true);
7845 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7846 hl_fw_unmask_irq(hdev, event_type);
7847 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7850 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7851 gaudi_print_irq_info(hdev, event_type, true);
7852 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7855 case GAUDI_EVENT_TPC0_BMON_SPMU:
7856 case GAUDI_EVENT_TPC1_BMON_SPMU:
7857 case GAUDI_EVENT_TPC2_BMON_SPMU:
7858 case GAUDI_EVENT_TPC3_BMON_SPMU:
7859 case GAUDI_EVENT_TPC4_BMON_SPMU:
7860 case GAUDI_EVENT_TPC5_BMON_SPMU:
7861 case GAUDI_EVENT_TPC6_BMON_SPMU:
7862 case GAUDI_EVENT_TPC7_BMON_SPMU:
7863 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7864 gaudi_print_irq_info(hdev, event_type, false);
7865 hl_fw_unmask_irq(hdev, event_type);
7866 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7869 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7870 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7871 hl_fw_unmask_irq(hdev, event_type);
7872 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7875 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7876 gaudi_print_irq_info(hdev, event_type, false);
7877 gaudi_print_sm_sei_info(hdev, event_type,
7878 &eq_entry->sm_sei_data);
7879 rc = hl_state_dump(hdev);
7880 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7883 "Error during system state dump %d\n", rc);
7884 hl_fw_unmask_irq(hdev, event_type);
7887 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7890 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7891 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7892 gaudi_print_clk_change_info(hdev, event_type);
7893 hl_fw_unmask_irq(hdev, event_type);
7896 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7897 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7899 "Received high temp H/W interrupt %d (cause %d)\n",
7901 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7904 case GAUDI_EVENT_DEV_RESET_REQ:
7905 gaudi_print_irq_info(hdev, event_type, false);
7906 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7909 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7910 gaudi_print_irq_info(hdev, event_type, false);
7911 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7912 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7915 case GAUDI_EVENT_FW_ALIVE_S:
7916 gaudi_print_irq_info(hdev, event_type, false);
7917 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7918 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7922 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7928 hl_notifier_event_send_all(hdev, event_mask);
7933 reset_required = true;
7935 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7936 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7938 /* notify on device unavailable while the reset triggered by fw */
7939 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7940 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7941 } else if (hdev->hard_reset_on_fw_events) {
7942 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7943 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7945 reset_required = false;
7948 /* despite reset doesn't execute. a notification on
7949 * occurred event needs to be sent here
7951 hl_notifier_event_send_all(hdev, event_mask);
7953 hl_device_reset(hdev, flags);
7955 hl_fw_unmask_irq(hdev, event_type);
7958 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7960 struct gaudi_device *gaudi = hdev->asic_specific;
7963 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7964 return gaudi->events_stat_aggregate;
7967 *size = (u32) sizeof(gaudi->events_stat);
7968 return gaudi->events_stat;
7971 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7973 struct gaudi_device *gaudi = hdev->asic_specific;
7974 u32 status, timeout_usec;
7977 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7978 hdev->reset_info.hard_reset_pending)
7982 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7984 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7986 /* L0 & L1 invalidation */
7987 WREG32(mmSTLB_INV_PS, 3);
7988 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7989 WREG32(mmSTLB_INV_PS, 2);
7991 rc = hl_poll_timeout(
7999 WREG32(mmSTLB_INV_SET, 0);
8004 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8005 bool is_hard, u32 flags,
8006 u32 asid, u64 va, u64 size)
8008 /* Treat as invalidate all because there is no range invalidation
8011 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8014 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8016 u32 status, timeout_usec;
8020 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8022 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8024 WREG32(MMU_ASID, asid);
8025 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8026 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8027 WREG32(MMU_BUSY, 0x80000000);
8029 rc = hl_poll_timeout(
8033 !(status & 0x80000000),
8039 "Timeout during MMU hop0 config of asid %d\n", asid);
8046 static int gaudi_send_heartbeat(struct hl_device *hdev)
8048 struct gaudi_device *gaudi = hdev->asic_specific;
8050 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8053 return hl_fw_send_heartbeat(hdev);
8056 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8058 struct gaudi_device *gaudi = hdev->asic_specific;
8059 struct asic_fixed_properties *prop = &hdev->asic_prop;
8062 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8065 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8066 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8071 if (!strlen(prop->cpucp_info.card_name))
8072 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8075 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8077 set_default_power_values(hdev);
8082 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8083 struct engines_data *e)
8085 struct gaudi_device *gaudi = hdev->asic_specific;
8086 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8087 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8088 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8089 unsigned long *mask = (unsigned long *)mask_arr;
8090 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8091 bool is_idle = true, is_eng_idle, is_slave;
8093 int i, dma_id, port;
8096 hl_engine_data_sprintf(e,
8097 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8098 "--- ------- ------------ ---------- -------------\n");
8100 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8101 dma_id = gaudi_dma_assignment[i];
8102 offset = dma_id * DMA_QMAN_OFFSET;
8104 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8105 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8106 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8107 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8108 IS_DMA_IDLE(dma_core_sts0);
8109 is_idle &= is_eng_idle;
8111 if (mask && !is_eng_idle)
8112 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8114 hl_engine_data_sprintf(e, fmt, dma_id,
8115 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8116 qm_cgm_sts, dma_core_sts0);
8120 hl_engine_data_sprintf(e,
8121 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8122 "--- ------- ------------ ---------- ----------\n");
8124 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8125 offset = i * TPC_QMAN_OFFSET;
8126 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8127 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8128 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8129 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8130 IS_TPC_IDLE(tpc_cfg_sts);
8131 is_idle &= is_eng_idle;
8133 if (mask && !is_eng_idle)
8134 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8136 hl_engine_data_sprintf(e, fmt, i,
8137 is_eng_idle ? "Y" : "N",
8138 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8142 hl_engine_data_sprintf(e,
8143 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8144 "--- ------- ------------ ---------- -----------\n");
8146 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8147 offset = i * MME_QMAN_OFFSET;
8148 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8149 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8151 /* MME 1 & 3 are slaves, no need to check their QMANs */
8154 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8155 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8156 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8159 is_idle &= is_eng_idle;
8161 if (mask && !is_eng_idle)
8162 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8165 hl_engine_data_sprintf(e, fmt, i,
8166 is_eng_idle ? "Y" : "N",
8167 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8169 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8170 is_eng_idle ? "Y" : "N", "-",
8176 hl_engine_data_sprintf(e,
8177 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8178 "--- ------- ------------ ----------\n");
8180 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8181 offset = i * NIC_MACRO_QMAN_OFFSET;
8183 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8184 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8185 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8186 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8187 is_idle &= is_eng_idle;
8189 if (mask && !is_eng_idle)
8190 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8192 hl_engine_data_sprintf(e, nic_fmt, port,
8193 is_eng_idle ? "Y" : "N",
8194 qm_glbl_sts0, qm_cgm_sts);
8198 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8199 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8200 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8201 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8202 is_idle &= is_eng_idle;
8204 if (mask && !is_eng_idle)
8205 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8207 hl_engine_data_sprintf(e, nic_fmt, port,
8208 is_eng_idle ? "Y" : "N",
8209 qm_glbl_sts0, qm_cgm_sts);
8214 hl_engine_data_sprintf(e, "\n");
8219 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8220 __acquires(&gaudi->hw_queues_lock)
8222 struct gaudi_device *gaudi = hdev->asic_specific;
8224 spin_lock(&gaudi->hw_queues_lock);
8227 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8228 __releases(&gaudi->hw_queues_lock)
8230 struct gaudi_device *gaudi = hdev->asic_specific;
8232 spin_unlock(&gaudi->hw_queues_lock);
8235 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8237 return hdev->pdev->device;
8240 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8243 struct gaudi_device *gaudi = hdev->asic_specific;
8245 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8248 return hl_fw_get_eeprom_data(hdev, data, max_size);
8251 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8253 struct gaudi_device *gaudi = hdev->asic_specific;
8255 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8258 return hl_fw_get_monitor_dump(hdev, data);
8262 * this function should be used only during initialization and/or after reset,
8263 * when there are no active users.
8265 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8271 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8274 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8276 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8278 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8279 lower_32_bits(tpc_kernel));
8280 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8281 upper_32_bits(tpc_kernel));
8283 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8284 lower_32_bits(tpc_kernel));
8285 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8286 upper_32_bits(tpc_kernel));
8287 /* set a valid LUT pointer, content is of no significance */
8288 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8289 lower_32_bits(tpc_kernel));
8290 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8291 upper_32_bits(tpc_kernel));
8293 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8294 lower_32_bits(CFG_BASE +
8295 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8297 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8298 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8299 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8300 /* wait a bit for the engine to start executing */
8301 usleep_range(1000, 1500);
8303 /* wait until engine has finished executing */
8304 rc = hl_poll_timeout(
8306 mmTPC0_CFG_STATUS + offset,
8308 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8309 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8315 "Timeout while waiting for TPC%d icache prefetch\n",
8320 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8321 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8323 /* wait a bit for the engine to start executing */
8324 usleep_range(1000, 1500);
8326 /* wait until engine has finished executing */
8327 rc = hl_poll_timeout(
8329 mmTPC0_CFG_STATUS + offset,
8331 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8332 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8338 "Timeout while waiting for TPC%d vector pipe\n",
8343 rc = hl_poll_timeout(
8345 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8353 "Timeout while waiting for TPC%d kernel to execute\n",
8361 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8364 struct gaudi_device *gaudi = hdev->asic_specific;
8365 int min_alloc_order, rc, collective_cb_size;
8367 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8370 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8371 HOST_SPACE_INTERNAL_CB_SZ,
8372 &hdev->internal_cb_pool_dma_addr,
8373 GFP_KERNEL | __GFP_ZERO);
8375 if (!hdev->internal_cb_pool_virt_addr)
8378 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8379 sizeof(struct packet_fence);
8380 min_alloc_order = ilog2(collective_cb_size);
8382 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8383 if (!hdev->internal_cb_pool) {
8385 "Failed to create internal CB pool\n");
8387 goto free_internal_cb_pool;
8390 rc = gen_pool_add(hdev->internal_cb_pool,
8391 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8392 HOST_SPACE_INTERNAL_CB_SZ, -1);
8395 "Failed to add memory to internal CB pool\n");
8397 goto destroy_internal_cb_pool;
8400 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8401 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8402 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8404 if (!hdev->internal_cb_va_base) {
8406 goto destroy_internal_cb_pool;
8409 mutex_lock(&hdev->mmu_lock);
8410 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8411 hdev->internal_cb_pool_dma_addr,
8412 HOST_SPACE_INTERNAL_CB_SZ);
8414 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8415 mutex_unlock(&hdev->mmu_lock);
8418 goto unreserve_internal_cb_pool;
8422 unreserve_internal_cb_pool:
8423 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8424 HOST_SPACE_INTERNAL_CB_SZ);
8425 destroy_internal_cb_pool:
8426 gen_pool_destroy(hdev->internal_cb_pool);
8427 free_internal_cb_pool:
8428 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8429 hdev->internal_cb_pool_dma_addr);
8434 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8437 struct gaudi_device *gaudi = hdev->asic_specific;
8439 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8442 mutex_lock(&hdev->mmu_lock);
8443 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8444 HOST_SPACE_INTERNAL_CB_SZ);
8445 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8446 HOST_SPACE_INTERNAL_CB_SZ);
8447 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8448 mutex_unlock(&hdev->mmu_lock);
8450 gen_pool_destroy(hdev->internal_cb_pool);
8452 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8453 hdev->internal_cb_pool_dma_addr);
8456 static int gaudi_ctx_init(struct hl_ctx *ctx)
8460 if (ctx->asid == HL_KERNEL_ASID_ID)
8463 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8467 rc = gaudi_restore_user_registers(ctx->hdev);
8469 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8474 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8476 if (ctx->asid == HL_KERNEL_ASID_ID)
8479 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8482 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8487 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8489 return gaudi_cq_assignment[cq_idx];
8492 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8494 return sizeof(struct packet_msg_short) +
8495 sizeof(struct packet_msg_prot) * 2;
8498 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8500 return sizeof(struct packet_msg_short) * 4 +
8501 sizeof(struct packet_fence) +
8502 sizeof(struct packet_msg_prot) * 2;
8505 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8507 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8510 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8513 struct hl_cb *cb = (struct hl_cb *) data;
8514 struct packet_msg_short *pkt;
8515 u32 value, ctl, pkt_size = sizeof(*pkt);
8517 pkt = cb->kernel_address + size;
8518 memset(pkt, 0, pkt_size);
8520 /* Inc by 1, Mode ADD */
8521 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8522 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8524 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8525 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8526 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8527 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8528 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8529 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8530 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8532 pkt->value = cpu_to_le32(value);
8533 pkt->ctl = cpu_to_le32(ctl);
8535 return size + pkt_size;
8538 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8541 u32 ctl, pkt_size = sizeof(*pkt);
8543 memset(pkt, 0, pkt_size);
8545 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8546 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8549 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8550 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8552 pkt->value = cpu_to_le32(value);
8553 pkt->ctl = cpu_to_le32(ctl);
8558 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8559 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8560 u16 sob_val, u16 mon_id)
8563 u32 ctl, value, pkt_size = sizeof(*pkt);
8564 u16 msg_addr_offset;
8567 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8569 "sob_base %u (mask %#x) is not valid\n",
8570 sob_base, sob_mask);
8575 * monitor_base should be the content of the base0 address registers,
8576 * so it will be added to the msg short offsets
8578 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8581 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8584 memset(pkt, 0, pkt_size);
8586 /* Monitor config packet: bind the monitor to a sync object */
8587 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8588 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8589 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8590 0); /* GREATER OR EQUAL*/
8591 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8593 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8594 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8595 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8596 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8597 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8598 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8599 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8601 pkt->value = cpu_to_le32(value);
8602 pkt->ctl = cpu_to_le32(ctl);
8607 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8609 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8611 memset(pkt, 0, pkt_size);
8613 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8614 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8615 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8617 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8618 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8619 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8620 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8622 pkt->cfg = cpu_to_le32(cfg);
8623 pkt->ctl = cpu_to_le32(ctl);
8628 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8630 u32 offset, nic_index;
8633 case GAUDI_QUEUE_ID_DMA_0_0:
8634 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8636 case GAUDI_QUEUE_ID_DMA_0_1:
8637 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8639 case GAUDI_QUEUE_ID_DMA_0_2:
8640 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8642 case GAUDI_QUEUE_ID_DMA_0_3:
8643 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8645 case GAUDI_QUEUE_ID_DMA_1_0:
8646 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8648 case GAUDI_QUEUE_ID_DMA_1_1:
8649 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8651 case GAUDI_QUEUE_ID_DMA_1_2:
8652 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8654 case GAUDI_QUEUE_ID_DMA_1_3:
8655 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8657 case GAUDI_QUEUE_ID_DMA_5_0:
8658 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8660 case GAUDI_QUEUE_ID_DMA_5_1:
8661 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8663 case GAUDI_QUEUE_ID_DMA_5_2:
8664 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8666 case GAUDI_QUEUE_ID_DMA_5_3:
8667 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8669 case GAUDI_QUEUE_ID_TPC_7_0:
8670 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8672 case GAUDI_QUEUE_ID_TPC_7_1:
8673 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8675 case GAUDI_QUEUE_ID_TPC_7_2:
8676 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8678 case GAUDI_QUEUE_ID_TPC_7_3:
8679 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8681 case GAUDI_QUEUE_ID_NIC_0_0:
8682 case GAUDI_QUEUE_ID_NIC_1_0:
8683 case GAUDI_QUEUE_ID_NIC_2_0:
8684 case GAUDI_QUEUE_ID_NIC_3_0:
8685 case GAUDI_QUEUE_ID_NIC_4_0:
8686 case GAUDI_QUEUE_ID_NIC_5_0:
8687 case GAUDI_QUEUE_ID_NIC_6_0:
8688 case GAUDI_QUEUE_ID_NIC_7_0:
8689 case GAUDI_QUEUE_ID_NIC_8_0:
8690 case GAUDI_QUEUE_ID_NIC_9_0:
8691 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8692 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8693 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8694 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8696 case GAUDI_QUEUE_ID_NIC_0_1:
8697 case GAUDI_QUEUE_ID_NIC_1_1:
8698 case GAUDI_QUEUE_ID_NIC_2_1:
8699 case GAUDI_QUEUE_ID_NIC_3_1:
8700 case GAUDI_QUEUE_ID_NIC_4_1:
8701 case GAUDI_QUEUE_ID_NIC_5_1:
8702 case GAUDI_QUEUE_ID_NIC_6_1:
8703 case GAUDI_QUEUE_ID_NIC_7_1:
8704 case GAUDI_QUEUE_ID_NIC_8_1:
8705 case GAUDI_QUEUE_ID_NIC_9_1:
8706 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8707 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8708 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8709 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8711 case GAUDI_QUEUE_ID_NIC_0_2:
8712 case GAUDI_QUEUE_ID_NIC_1_2:
8713 case GAUDI_QUEUE_ID_NIC_2_2:
8714 case GAUDI_QUEUE_ID_NIC_3_2:
8715 case GAUDI_QUEUE_ID_NIC_4_2:
8716 case GAUDI_QUEUE_ID_NIC_5_2:
8717 case GAUDI_QUEUE_ID_NIC_6_2:
8718 case GAUDI_QUEUE_ID_NIC_7_2:
8719 case GAUDI_QUEUE_ID_NIC_8_2:
8720 case GAUDI_QUEUE_ID_NIC_9_2:
8721 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8722 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8723 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8724 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8726 case GAUDI_QUEUE_ID_NIC_0_3:
8727 case GAUDI_QUEUE_ID_NIC_1_3:
8728 case GAUDI_QUEUE_ID_NIC_2_3:
8729 case GAUDI_QUEUE_ID_NIC_3_3:
8730 case GAUDI_QUEUE_ID_NIC_4_3:
8731 case GAUDI_QUEUE_ID_NIC_5_3:
8732 case GAUDI_QUEUE_ID_NIC_6_3:
8733 case GAUDI_QUEUE_ID_NIC_7_3:
8734 case GAUDI_QUEUE_ID_NIC_8_3:
8735 case GAUDI_QUEUE_ID_NIC_9_3:
8736 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8737 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8738 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8739 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8745 *addr = CFG_BASE + offset;
8750 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8754 u16 msg_addr_offset;
8757 * monitor_base should be the content of the base0 address registers,
8758 * so it will be added to the msg short offsets
8760 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8762 /* First monitor config packet: low address of the sync */
8764 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8767 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8770 /* Second monitor config packet: high address of the sync */
8772 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8775 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8779 * Third monitor config packet: the payload, i.e. what to write when the
8783 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8786 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8791 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8792 struct hl_gen_wait_properties *prop)
8794 struct hl_cb *cb = (struct hl_cb *) prop->data;
8795 void *buf = cb->kernel_address;
8797 u32 size = prop->size;
8799 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8800 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8805 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8806 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8807 prop->sob_mask, prop->sob_val, prop->mon_id);
8808 size += gaudi_add_fence_pkt(buf + size);
8813 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8815 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8817 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8820 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8821 hw_sob->sob_id * 4, 0);
8823 kref_init(&hw_sob->kref);
8826 static u64 gaudi_get_device_time(struct hl_device *hdev)
8828 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8830 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8833 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8834 u32 *block_size, u32 *block_id)
8839 static int gaudi_block_mmap(struct hl_device *hdev,
8840 struct vm_area_struct *vma,
8841 u32 block_id, u32 block_size)
8846 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8848 struct cpu_dyn_regs *dyn_regs =
8849 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8850 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8851 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8852 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8854 WREG32(irq_handler_offset,
8855 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8858 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8863 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8866 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8867 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8868 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8869 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8870 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8871 case HL_GAUDI_MME_PLL: return MME_PLL;
8872 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8873 case HL_GAUDI_IF_PLL: return IF_PLL;
8874 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8875 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8876 default: return -EINVAL;
8880 static int gaudi_add_sync_to_engine_map_entry(
8881 struct hl_sync_to_engine_map *map, u32 reg_value,
8882 enum hl_sync_engine_type engine_type, u32 engine_id)
8884 struct hl_sync_to_engine_map_entry *entry;
8886 /* Reg value represents a partial address of sync object,
8887 * it is used as unique identifier. For this we need to
8888 * clear the cutoff cfg base bits from the value.
8890 if (reg_value == 0 || reg_value == 0xffffffff)
8892 reg_value -= lower_32_bits(CFG_BASE);
8894 /* create a new hash entry */
8895 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8898 entry->engine_type = engine_type;
8899 entry->engine_id = engine_id;
8900 entry->sync_id = reg_value;
8901 hash_add(map->tb, &entry->node, reg_value);
8906 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8907 struct hl_sync_to_engine_map *map)
8909 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8913 /* Iterate over TPC engines */
8914 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8916 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8917 sds->props[SP_NEXT_TPC] * i);
8919 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8922 goto free_sync_to_engine_map;
8925 /* Iterate over MME engines */
8926 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8927 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8929 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8930 sds->props[SP_NEXT_MME] * i +
8933 rc = gaudi_add_sync_to_engine_map_entry(
8934 map, reg_value, ENGINE_MME,
8935 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8937 goto free_sync_to_engine_map;
8941 /* Iterate over DMA engines */
8942 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8943 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8944 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8945 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8948 goto free_sync_to_engine_map;
8953 free_sync_to_engine_map:
8954 hl_state_dump_free_sync_to_engine_map(map);
8959 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8962 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8966 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8968 const size_t max_write = 10;
8972 /* Sync object ID is calculated as follows:
8973 * (8 * group_id + cleared bits in mask)
8975 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8977 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8980 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8981 max_write; mask >>= 1, i++) {
8983 sob = gid * MONITOR_MAX_SOBS + i;
8986 offset += snprintf(sobs + offset, max_write,
8989 offset += snprintf(sobs + offset, max_write, "%u", sob);
8994 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8995 struct hl_device *hdev,
8996 struct hl_mon_state_dump *mon)
8999 char scratch_buf1[BIN_REG_STRING_SIZE],
9000 scratch_buf2[BIN_REG_STRING_SIZE];
9001 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9003 name = hl_state_dump_get_monitor_name(hdev, mon);
9007 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9009 return hl_snprintf_resize(
9011 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9013 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9015 hl_format_as_binary(
9016 scratch_buf1, sizeof(scratch_buf1),
9018 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9020 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9023 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9024 hl_format_as_binary(
9025 scratch_buf2, sizeof(scratch_buf2),
9027 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9033 static int gaudi_print_fences_single_engine(
9034 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9035 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9036 size_t *size, size_t *offset)
9038 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9039 int rc = -ENOMEM, i;
9040 u32 *statuses, *fences;
9042 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9043 sizeof(*statuses), GFP_KERNEL);
9047 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9048 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9049 sizeof(*fences), GFP_KERNEL);
9053 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9054 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9056 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9057 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9058 fences[i] = RREG32(base_offset + i * sizeof(u32));
9060 /* The actual print */
9061 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9063 u64 fence_cnt, fence_rdata;
9064 const char *engine_name;
9066 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9071 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9072 fence_cnt = base_offset + CFG_BASE +
9074 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9075 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9076 sds->props[SP_FENCE0_RDATA_OFFSET];
9077 engine_name = hl_sync_engine_to_string(engine_type);
9079 rc = hl_snprintf_resize(
9081 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9082 engine_name, engine_id,
9084 fence_cnt, engine_name, engine_id, fence_id, i,
9085 fence_rdata, engine_name, engine_id, fence_id, i,
9103 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9104 .monitor_valid = gaudi_monitor_valid,
9105 .print_single_monitor = gaudi_print_single_monitor,
9106 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9107 .print_fences_single_engine = gaudi_print_fences_single_engine,
9110 static void gaudi_state_dump_init(struct hl_device *hdev)
9112 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9115 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9116 hash_add(sds->so_id_to_str_tb,
9117 &gaudi_so_id_to_str[i].node,
9118 gaudi_so_id_to_str[i].id);
9120 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9121 hash_add(sds->monitor_id_to_str_tb,
9122 &gaudi_monitor_id_to_str[i].node,
9123 gaudi_monitor_id_to_str[i].id);
9125 sds->props = gaudi_state_dump_specs_props;
9127 sds->sync_namager_names = gaudi_sync_manager_names;
9129 sds->funcs = gaudi_state_dump_funcs;
9132 static u32 *gaudi_get_stream_master_qid_arr(void)
9134 return gaudi_stream_master;
9137 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9141 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9143 struct hl_device *hdev = dev_get_drvdata(dev);
9144 struct cpucp_info *cpucp_info;
9146 cpucp_info = &hdev->asic_prop.cpucp_info;
9148 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9151 static DEVICE_ATTR_RO(infineon_ver);
9153 static struct attribute *gaudi_vrm_dev_attrs[] = {
9154 &dev_attr_infineon_ver.attr,
9158 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9159 struct attribute_group *dev_vrm_attr_grp)
9161 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9162 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9165 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9170 static const struct hl_asic_funcs gaudi_funcs = {
9171 .early_init = gaudi_early_init,
9172 .early_fini = gaudi_early_fini,
9173 .late_init = gaudi_late_init,
9174 .late_fini = gaudi_late_fini,
9175 .sw_init = gaudi_sw_init,
9176 .sw_fini = gaudi_sw_fini,
9177 .hw_init = gaudi_hw_init,
9178 .hw_fini = gaudi_hw_fini,
9179 .halt_engines = gaudi_halt_engines,
9180 .suspend = gaudi_suspend,
9181 .resume = gaudi_resume,
9183 .ring_doorbell = gaudi_ring_doorbell,
9184 .pqe_write = gaudi_pqe_write,
9185 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9186 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9187 .scrub_device_mem = gaudi_scrub_device_mem,
9188 .scrub_device_dram = gaudi_scrub_device_dram,
9189 .get_int_queue_base = gaudi_get_int_queue_base,
9190 .test_queues = gaudi_test_queues,
9191 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9192 .asic_dma_pool_free = gaudi_dma_pool_free,
9193 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9194 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9195 .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9196 .cs_parser = gaudi_cs_parser,
9197 .asic_dma_map_sgtable = hl_dma_map_sgtable,
9198 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9199 .update_eq_ci = gaudi_update_eq_ci,
9200 .context_switch = gaudi_context_switch,
9201 .restore_phase_topology = gaudi_restore_phase_topology,
9202 .debugfs_read_dma = gaudi_debugfs_read_dma,
9203 .add_device_attr = gaudi_add_device_attr,
9204 .handle_eqe = gaudi_handle_eqe,
9205 .get_events_stat = gaudi_get_events_stat,
9206 .read_pte = gaudi_read_pte,
9207 .write_pte = gaudi_write_pte,
9208 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9209 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9210 .mmu_prefetch_cache_range = NULL,
9211 .send_heartbeat = gaudi_send_heartbeat,
9212 .debug_coresight = gaudi_debug_coresight,
9213 .is_device_idle = gaudi_is_device_idle,
9214 .compute_reset_late_init = gaudi_compute_reset_late_init,
9215 .hw_queues_lock = gaudi_hw_queues_lock,
9216 .hw_queues_unlock = gaudi_hw_queues_unlock,
9217 .get_pci_id = gaudi_get_pci_id,
9218 .get_eeprom_data = gaudi_get_eeprom_data,
9219 .get_monitor_dump = gaudi_get_monitor_dump,
9220 .send_cpu_message = gaudi_send_cpu_message,
9221 .pci_bars_map = gaudi_pci_bars_map,
9222 .init_iatu = gaudi_init_iatu,
9225 .halt_coresight = gaudi_halt_coresight,
9226 .ctx_init = gaudi_ctx_init,
9227 .ctx_fini = gaudi_ctx_fini,
9228 .pre_schedule_cs = gaudi_pre_schedule_cs,
9229 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9230 .load_firmware_to_device = gaudi_load_firmware_to_device,
9231 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9232 .get_signal_cb_size = gaudi_get_signal_cb_size,
9233 .get_wait_cb_size = gaudi_get_wait_cb_size,
9234 .gen_signal_cb = gaudi_gen_signal_cb,
9235 .gen_wait_cb = gaudi_gen_wait_cb,
9236 .reset_sob = gaudi_reset_sob,
9237 .reset_sob_group = gaudi_reset_sob_group,
9238 .get_device_time = gaudi_get_device_time,
9239 .pb_print_security_errors = NULL,
9240 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9241 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9242 .get_dec_base_addr = NULL,
9243 .scramble_addr = hl_mmu_scramble_addr,
9244 .descramble_addr = hl_mmu_descramble_addr,
9245 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9246 .get_hw_block_id = gaudi_get_hw_block_id,
9247 .hw_block_mmap = gaudi_block_mmap,
9248 .enable_events_from_fw = gaudi_enable_events_from_fw,
9249 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9250 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9251 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9252 .init_firmware_loader = gaudi_init_firmware_loader,
9253 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9254 .state_dump_init = gaudi_state_dump_init,
9255 .get_sob_addr = gaudi_get_sob_addr,
9256 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9257 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9258 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9259 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9260 .access_dev_mem = hl_access_dev_mem,
9261 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9262 .send_device_activity = gaudi_send_device_activity,
9266 * gaudi_set_asic_funcs - set GAUDI function pointers
9268 * @hdev: pointer to hl_device structure
9271 void gaudi_set_asic_funcs(struct hl_device *hdev)
9273 hdev->asic_funcs = &gaudi_funcs;