1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85 #define GAUDI_MAX_STRING_LEN 20
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
107 #define GAUDI_PLL_MAX 10
109 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
111 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
118 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
119 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
121 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
124 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
129 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130 [0] = GAUDI_QUEUE_ID_DMA_0_0,
131 [1] = GAUDI_QUEUE_ID_DMA_0_1,
132 [2] = GAUDI_QUEUE_ID_DMA_0_2,
133 [3] = GAUDI_QUEUE_ID_DMA_0_3,
134 [4] = GAUDI_QUEUE_ID_DMA_1_0,
135 [5] = GAUDI_QUEUE_ID_DMA_1_1,
136 [6] = GAUDI_QUEUE_ID_DMA_1_2,
137 [7] = GAUDI_QUEUE_ID_DMA_1_3,
140 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
142 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
143 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
144 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
145 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
146 [PACKET_REPEAT] = sizeof(struct packet_repeat),
147 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
148 [PACKET_FENCE] = sizeof(struct packet_fence),
149 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
150 [PACKET_NOP] = sizeof(struct packet_nop),
151 [PACKET_STOP] = sizeof(struct packet_stop),
152 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
153 [PACKET_WAIT] = sizeof(struct packet_wait),
154 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
157 static inline bool validate_packet_id(enum packet_id id)
161 case PACKET_WREG_BULK:
162 case PACKET_MSG_LONG:
163 case PACKET_MSG_SHORT:
166 case PACKET_MSG_PROT:
171 case PACKET_ARB_POINT:
173 case PACKET_LOAD_AND_EXE:
180 static const char * const
181 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182 "tpc_address_exceed_slm",
184 "tpc_spu_mac_overflow",
185 "tpc_spu_addsub_overflow",
186 "tpc_spu_abs_overflow",
187 "tpc_spu_fp_dst_nan_inf",
188 "tpc_spu_fp_dst_denorm",
189 "tpc_vpu_mac_overflow",
190 "tpc_vpu_addsub_overflow",
191 "tpc_vpu_abs_overflow",
192 "tpc_vpu_fp_dst_nan_inf",
193 "tpc_vpu_fp_dst_denorm",
195 "tpc_illegal_instruction",
196 "tpc_pc_wrap_around",
204 static const char * const
205 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 "CP error due to undefined OPCODE",
210 "CP encountered STOP OPCODE",
212 "CP WRREG32 or WRBULK returned error",
214 "FENCE 0 inc over max value and clipped",
215 "FENCE 1 inc over max value and clipped",
216 "FENCE 2 inc over max value and clipped",
217 "FENCE 3 inc over max value and clipped",
218 "FENCE 0 dec under min value and clipped",
219 "FENCE 1 dec under min value and clipped",
220 "FENCE 2 dec under min value and clipped",
221 "FENCE 3 dec under min value and clipped"
224 static const char * const
225 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226 "Choice push while full error",
227 "Choice Q watchdog error",
228 "MSG AXI LBW returned with error"
231 enum gaudi_sm_sei_cause {
232 GAUDI_SM_SEI_SO_OVERFLOW,
233 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234 GAUDI_SM_SEI_AXI_RESPONSE_ERR
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
397 static s64 gaudi_state_dump_specs_props[] = {
398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 [SP_MON_OBJ_WR_ADDR_LOW] =
402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 [SP_MON_OBJ_WR_ADDR_HIGH] =
404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 [SP_FENCE0_CNT_OFFSET] =
426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_FENCE0_RDATA_OFFSET] =
428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
433 /* The order here is opposite to the order of the indexing in the h/w.
434 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
436 static const char * const gaudi_sync_manager_names[] = {
444 struct ecc_info_extract_params {
448 bool disable_clock_gating;
451 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
453 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
454 struct hl_cs_job *job);
455 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
457 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
458 u32 num_regs, u32 val);
459 static int gaudi_schedule_register_memset(struct hl_device *hdev,
460 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
461 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
463 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
464 static int gaudi_cpucp_info_get(struct hl_device *hdev);
465 static void gaudi_disable_clock_gating(struct hl_device *hdev);
466 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
467 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
469 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
470 struct hl_gen_wait_properties *prop);
472 static inline enum hl_collective_mode
473 get_collective_mode(struct hl_device *hdev, u32 queue_id)
475 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
476 return HL_COLLECTIVE_MASTER;
478 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
479 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
480 return HL_COLLECTIVE_SLAVE;
482 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
483 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
484 return HL_COLLECTIVE_SLAVE;
486 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
487 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
488 return HL_COLLECTIVE_SLAVE;
490 return HL_COLLECTIVE_NOT_SUPPORTED;
493 static inline void set_default_power_values(struct hl_device *hdev)
495 struct asic_fixed_properties *prop = &hdev->asic_prop;
497 if (hdev->card_type == cpucp_card_type_pmc) {
498 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
499 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
501 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
502 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
506 static int gaudi_set_fixed_properties(struct hl_device *hdev)
508 struct asic_fixed_properties *prop = &hdev->asic_prop;
509 u32 num_sync_stream_queues = 0;
512 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
513 prop->hw_queues_props = kcalloc(prop->max_queues,
514 sizeof(struct hw_queue_properties),
517 if (!prop->hw_queues_props)
520 for (i = 0 ; i < prop->max_queues ; i++) {
521 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
522 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
523 prop->hw_queues_props[i].driver_only = 0;
524 prop->hw_queues_props[i].supports_sync_stream = 1;
525 prop->hw_queues_props[i].cb_alloc_flags =
527 num_sync_stream_queues++;
528 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
529 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
530 prop->hw_queues_props[i].driver_only = 1;
531 prop->hw_queues_props[i].supports_sync_stream = 0;
532 prop->hw_queues_props[i].cb_alloc_flags =
534 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
535 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
536 prop->hw_queues_props[i].driver_only = 0;
537 prop->hw_queues_props[i].supports_sync_stream = 0;
538 prop->hw_queues_props[i].cb_alloc_flags =
542 prop->hw_queues_props[i].collective_mode =
543 get_collective_mode(hdev, i);
546 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
547 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
548 prop->collective_first_sob = 0;
549 prop->collective_first_mon = 0;
551 /* 2 SOBs per internal queue stream are reserved for collective */
552 prop->sync_stream_first_sob =
553 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
554 * QMAN_STREAMS * HL_RSVD_SOBS;
556 /* 1 monitor per internal queue stream are reserved for collective
557 * 2 monitors per external queue stream are reserved for collective
559 prop->sync_stream_first_mon =
560 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
561 (NUMBER_OF_EXT_HW_QUEUES * 2);
563 prop->dram_base_address = DRAM_PHYS_BASE;
564 prop->dram_size = GAUDI_HBM_SIZE_32GB;
565 prop->dram_end_address = prop->dram_base_address +
567 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
569 prop->sram_base_address = SRAM_BASE_ADDR;
570 prop->sram_size = SRAM_SIZE;
571 prop->sram_end_address = prop->sram_base_address +
573 prop->sram_user_base_address = prop->sram_base_address +
574 SRAM_USER_BASE_OFFSET;
576 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
578 prop->mmu_pgt_size = 0x800000; /* 8MB */
580 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
581 prop->mmu_pte_size = HL_PTE_SIZE;
582 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
583 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
584 prop->dram_page_size = PAGE_SIZE_2MB;
585 prop->dram_supports_virtual_memory = false;
587 prop->pmmu.hop0_shift = HOP0_SHIFT;
588 prop->pmmu.hop1_shift = HOP1_SHIFT;
589 prop->pmmu.hop2_shift = HOP2_SHIFT;
590 prop->pmmu.hop3_shift = HOP3_SHIFT;
591 prop->pmmu.hop4_shift = HOP4_SHIFT;
592 prop->pmmu.hop0_mask = HOP0_MASK;
593 prop->pmmu.hop1_mask = HOP1_MASK;
594 prop->pmmu.hop2_mask = HOP2_MASK;
595 prop->pmmu.hop3_mask = HOP3_MASK;
596 prop->pmmu.hop4_mask = HOP4_MASK;
597 prop->pmmu.start_addr = VA_HOST_SPACE_START;
598 prop->pmmu.end_addr =
599 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
600 prop->pmmu.page_size = PAGE_SIZE_4KB;
601 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
603 /* PMMU and HPMMU are the same except of page size */
604 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
605 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
607 /* shifts and masks are the same in PMMU and DMMU */
608 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
609 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
610 prop->dmmu.end_addr = VA_HOST_SPACE_END;
611 prop->dmmu.page_size = PAGE_SIZE_2MB;
613 prop->cfg_size = CFG_SIZE;
614 prop->max_asid = MAX_ASID;
615 prop->num_of_events = GAUDI_EVENT_SIZE;
616 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
618 set_default_power_values(hdev);
620 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
621 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
623 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
624 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
626 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
629 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
631 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
632 prop->sync_stream_first_sob +
633 (num_sync_stream_queues * HL_RSVD_SOBS);
634 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
635 prop->sync_stream_first_mon +
636 (num_sync_stream_queues * HL_RSVD_MONS);
638 prop->first_available_user_msix_interrupt = USHRT_MAX;
640 for (i = 0 ; i < HL_MAX_DCORES ; i++)
641 prop->first_available_cq[i] = USHRT_MAX;
643 prop->fw_cpu_boot_dev_sts0_valid = false;
644 prop->fw_cpu_boot_dev_sts1_valid = false;
645 prop->hard_reset_done_by_fw = false;
646 prop->gic_interrupts_enable = true;
651 static int gaudi_pci_bars_map(struct hl_device *hdev)
653 static const char * const name[] = {"SRAM", "CFG", "HBM"};
654 bool is_wc[3] = {false, false, true};
657 rc = hl_pci_bars_map(hdev, name, is_wc);
661 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
662 (CFG_BASE - SPI_FLASH_BASE_ADDR);
667 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
669 struct gaudi_device *gaudi = hdev->asic_specific;
670 struct hl_inbound_pci_region pci_region;
674 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
677 if (hdev->asic_prop.iatu_done_by_fw)
680 /* Inbound Region 2 - Bar 4 - Point to HBM */
681 pci_region.mode = PCI_BAR_MATCH_MODE;
682 pci_region.bar = HBM_BAR_ID;
683 pci_region.addr = addr;
684 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
689 old_addr = gaudi->hbm_bar_cur_addr;
690 gaudi->hbm_bar_cur_addr = addr;
696 static int gaudi_init_iatu(struct hl_device *hdev)
698 struct hl_inbound_pci_region inbound_region;
699 struct hl_outbound_pci_region outbound_region;
702 if (hdev->asic_prop.iatu_done_by_fw)
705 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
706 inbound_region.mode = PCI_BAR_MATCH_MODE;
707 inbound_region.bar = SRAM_BAR_ID;
708 inbound_region.addr = SRAM_BASE_ADDR;
709 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
713 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
714 inbound_region.mode = PCI_BAR_MATCH_MODE;
715 inbound_region.bar = CFG_BAR_ID;
716 inbound_region.addr = SPI_FLASH_BASE_ADDR;
717 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
721 /* Inbound Region 2 - Bar 4 - Point to HBM */
722 inbound_region.mode = PCI_BAR_MATCH_MODE;
723 inbound_region.bar = HBM_BAR_ID;
724 inbound_region.addr = DRAM_PHYS_BASE;
725 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
729 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
731 /* Outbound Region 0 - Point to Host */
732 outbound_region.addr = HOST_PHYS_BASE;
733 outbound_region.size = HOST_PHYS_SIZE;
734 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
740 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
742 return RREG32(mmHW_STATE);
745 static int gaudi_early_init(struct hl_device *hdev)
747 struct asic_fixed_properties *prop = &hdev->asic_prop;
748 struct pci_dev *pdev = hdev->pdev;
752 rc = gaudi_set_fixed_properties(hdev);
754 dev_err(hdev->dev, "Failed setting fixed properties\n");
758 /* Check BAR sizes */
759 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
761 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
763 (unsigned long long) pci_resource_len(pdev,
767 goto free_queue_props;
770 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
772 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
774 (unsigned long long) pci_resource_len(pdev,
778 goto free_queue_props;
781 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
783 /* If FW security is enabled at this point it means no access to ELBI */
784 if (hdev->asic_prop.fw_security_enabled) {
785 hdev->asic_prop.iatu_done_by_fw = true;
788 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
789 * decision can only be taken based on PCI ID security.
791 hdev->asic_prop.gic_interrupts_enable = false;
795 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
798 goto free_queue_props;
800 /* Check whether FW is configuring iATU */
801 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
802 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
803 hdev->asic_prop.iatu_done_by_fw = true;
806 rc = hl_pci_init(hdev);
808 goto free_queue_props;
810 /* Before continuing in the initialization, we need to read the preboot
811 * version to determine whether we run with a security-enabled firmware
813 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
815 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
817 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
819 if (hdev->reset_on_preboot_fail)
820 hdev->asic_funcs->hw_fini(hdev, true);
824 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
826 "H/W state is dirty, must reset before initializing\n");
827 hdev->asic_funcs->hw_fini(hdev, true);
835 kfree(hdev->asic_prop.hw_queues_props);
839 static int gaudi_early_fini(struct hl_device *hdev)
841 kfree(hdev->asic_prop.hw_queues_props);
848 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
850 * @hdev: pointer to hl_device structure
853 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
855 struct asic_fixed_properties *prop = &hdev->asic_prop;
856 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
857 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
860 if (hdev->asic_prop.fw_security_enabled) {
861 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
866 freq = pll_freq_arr[2];
868 /* Backward compatibility */
869 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
870 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
871 nr = RREG32(mmPSOC_CPU_PLL_NR);
872 nf = RREG32(mmPSOC_CPU_PLL_NF);
873 od = RREG32(mmPSOC_CPU_PLL_OD);
875 if (div_sel == DIV_SEL_REF_CLK ||
876 div_sel == DIV_SEL_DIVIDED_REF) {
877 if (div_sel == DIV_SEL_REF_CLK)
880 freq = PLL_REF_CLK / (div_fctr + 1);
881 } else if (div_sel == DIV_SEL_PLL_CLK ||
882 div_sel == DIV_SEL_DIVIDED_PLL) {
883 pll_clk = PLL_REF_CLK * (nf + 1) /
884 ((nr + 1) * (od + 1));
885 if (div_sel == DIV_SEL_PLL_CLK)
888 freq = pll_clk / (div_fctr + 1);
891 "Received invalid div select value: %d",
897 prop->psoc_timestamp_frequency = freq;
898 prop->psoc_pci_pll_nr = nr;
899 prop->psoc_pci_pll_nf = nf;
900 prop->psoc_pci_pll_od = od;
901 prop->psoc_pci_pll_div_factor = div_fctr;
906 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
907 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
909 struct asic_fixed_properties *prop = &hdev->asic_prop;
910 struct packet_lin_dma *init_tpc_mem_pkt;
911 struct hl_cs_job *job;
918 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
922 init_tpc_mem_pkt = cb->kernel_address;
923 cb_size = sizeof(*init_tpc_mem_pkt);
924 memset(init_tpc_mem_pkt, 0, cb_size);
926 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
928 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
929 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
930 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
931 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
933 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
935 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
936 dst_addr = (prop->sram_user_base_address &
937 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
938 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
939 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
941 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
943 dev_err(hdev->dev, "Failed to allocate a new job\n");
950 atomic_inc(&job->user_cb->cs_cnt);
951 job->user_cb_size = cb_size;
952 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
953 job->patched_cb = job->user_cb;
954 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
956 hl_debugfs_add_job(hdev, job);
958 rc = gaudi_send_job_on_qman0(hdev, job);
963 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
964 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
970 hl_userptr_delete_list(hdev, &job->userptr_list);
971 hl_debugfs_remove_job(hdev, job);
973 atomic_dec(&cb->cs_cnt);
977 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
983 * gaudi_init_tpc_mem() - Initialize TPC memories.
984 * @hdev: Pointer to hl_device structure.
986 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
988 * Return: 0 for success, negative value for error.
990 static int gaudi_init_tpc_mem(struct hl_device *hdev)
992 const struct firmware *fw;
995 dma_addr_t dma_handle;
999 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1000 if (rc == -EINTR && count-- > 0) {
1006 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1012 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1013 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1016 "Failed to allocate %zu of dma memory for TPC kernel\n",
1022 memcpy(cpu_addr, fw->data, fw_size);
1024 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1026 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1030 release_firmware(fw);
1034 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1036 struct gaudi_device *gaudi = hdev->asic_specific;
1037 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1038 struct hl_hw_queue *q;
1039 u32 i, sob_id, sob_group_id, queue_id;
1041 /* Iterate through SOB groups and assign a SOB for each slave queue */
1043 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1044 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1046 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1047 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1048 q = &hdev->kernel_queues[queue_id + (4 * i)];
1049 q->sync_stream_prop.collective_sob_id = sob_id + i;
1052 /* Both DMA5 and TPC7 use the same resources since only a single
1053 * engine need to participate in the reduction process
1055 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1056 q = &hdev->kernel_queues[queue_id];
1057 q->sync_stream_prop.collective_sob_id =
1058 sob_id + NIC_NUMBER_OF_ENGINES;
1060 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1061 q = &hdev->kernel_queues[queue_id];
1062 q->sync_stream_prop.collective_sob_id =
1063 sob_id + NIC_NUMBER_OF_ENGINES;
1066 static void gaudi_sob_group_hw_reset(struct kref *ref)
1068 struct gaudi_hw_sob_group *hw_sob_group =
1069 container_of(ref, struct gaudi_hw_sob_group, kref);
1070 struct hl_device *hdev = hw_sob_group->hdev;
1074 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1075 hw_sob_group->base_sob_id * 4;
1076 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1077 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1080 "failed resetting sob group - sob base %u, count %u",
1081 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
1083 kref_init(&hw_sob_group->kref);
1086 static void gaudi_sob_group_reset_error(struct kref *ref)
1088 struct gaudi_hw_sob_group *hw_sob_group =
1089 container_of(ref, struct gaudi_hw_sob_group, kref);
1090 struct hl_device *hdev = hw_sob_group->hdev;
1093 "SOB release shouldn't be called here, base_sob_id: %d\n",
1094 hw_sob_group->base_sob_id);
1097 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1099 struct gaudi_collective_properties *prop;
1102 prop = &gaudi->collective_props;
1104 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1106 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1107 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1108 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1109 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1110 /* Set collective engine bit */
1111 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1112 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1115 static int gaudi_collective_init(struct hl_device *hdev)
1117 u32 i, sob_id, reserved_sobs_per_group;
1118 struct gaudi_collective_properties *prop;
1119 struct gaudi_device *gaudi;
1121 gaudi = hdev->asic_specific;
1122 prop = &gaudi->collective_props;
1123 sob_id = hdev->asic_prop.collective_first_sob;
1125 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1126 reserved_sobs_per_group =
1127 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1129 /* Init SOB groups */
1130 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1131 prop->hw_sob_group[i].hdev = hdev;
1132 prop->hw_sob_group[i].base_sob_id = sob_id;
1133 sob_id += reserved_sobs_per_group;
1134 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1137 for (i = 0 ; i < QMAN_STREAMS; i++) {
1138 prop->next_sob_group_val[i] = 1;
1139 prop->curr_sob_group_idx[i] = 0;
1140 gaudi_collective_map_sobs(hdev, i);
1143 gaudi_collective_mstr_sob_mask_set(gaudi);
1148 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1150 struct gaudi_device *gaudi = hdev->asic_specific;
1151 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1153 kref_put(&cprop->hw_sob_group[sob_group].kref,
1154 gaudi_sob_group_hw_reset);
1157 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1158 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1160 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1161 struct gaudi_collective_properties *cprop;
1162 struct hl_gen_wait_properties wait_prop;
1163 struct hl_sync_stream_properties *prop;
1164 struct gaudi_device *gaudi;
1166 gaudi = hdev->asic_specific;
1167 cprop = &gaudi->collective_props;
1168 queue_id = job->hw_queue_id;
1169 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1172 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1173 master_monitor = prop->collective_mstr_mon_id[0];
1175 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1178 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1179 master_sob_base, cprop->mstr_sob_mask[0],
1180 cprop->next_sob_group_val[stream],
1181 master_monitor, queue_id);
1183 wait_prop.data = (void *) job->patched_cb;
1184 wait_prop.sob_base = master_sob_base;
1185 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1186 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1187 wait_prop.mon_id = master_monitor;
1188 wait_prop.q_idx = queue_id;
1189 wait_prop.size = cb_size;
1190 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1192 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1193 master_monitor = prop->collective_mstr_mon_id[1];
1196 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1197 master_sob_base, cprop->mstr_sob_mask[1],
1198 cprop->next_sob_group_val[stream],
1199 master_monitor, queue_id);
1201 wait_prop.sob_base = master_sob_base;
1202 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1203 wait_prop.mon_id = master_monitor;
1204 wait_prop.size = cb_size;
1205 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1208 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1209 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1211 struct hl_gen_wait_properties wait_prop;
1212 struct hl_sync_stream_properties *prop;
1213 u32 queue_id, cb_size = 0;
1215 queue_id = job->hw_queue_id;
1216 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1218 /* Add to wait CBs using slave monitor */
1219 wait_prop.data = (void *) job->user_cb;
1220 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1221 wait_prop.sob_mask = 0x1;
1222 wait_prop.sob_val = cs_cmpl->sob_val;
1223 wait_prop.mon_id = prop->collective_slave_mon_id;
1224 wait_prop.q_idx = queue_id;
1225 wait_prop.size = cb_size;
1228 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1229 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1230 prop->collective_slave_mon_id, queue_id);
1232 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1235 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1236 prop->collective_sob_id, queue_id);
1238 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1239 prop->collective_sob_id, cb_size, false);
1242 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1244 struct hl_cs_compl *signal_cs_cmpl =
1245 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1246 struct hl_cs_compl *cs_cmpl =
1247 container_of(cs->fence, struct hl_cs_compl, base_fence);
1248 struct gaudi_collective_properties *cprop;
1249 u32 stream, queue_id, sob_group_offset;
1250 struct gaudi_device *gaudi;
1251 struct hl_device *hdev;
1252 struct hl_cs_job *job;
1257 gaudi = hdev->asic_specific;
1258 cprop = &gaudi->collective_props;
1260 /* copy the SOB id and value of the signal CS */
1261 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1262 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1264 /* Calculate the stream from collective master queue (1st job) */
1265 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1266 stream = job->hw_queue_id % 4;
1268 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1270 list_for_each_entry(job, &cs->job_list, cs_node) {
1271 queue_id = job->hw_queue_id;
1273 if (hdev->kernel_queues[queue_id].collective_mode ==
1274 HL_COLLECTIVE_MASTER)
1275 gaudi_collective_master_init_job(hdev, job, stream,
1278 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1281 cs_cmpl->sob_group = sob_group_offset;
1283 /* Handle sob group kref and wraparound */
1284 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1285 cprop->next_sob_group_val[stream]++;
1287 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1289 * Decrement as we reached the max value.
1290 * The release function won't be called here as we've
1291 * just incremented the refcount.
1293 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1294 gaudi_sob_group_reset_error);
1295 cprop->next_sob_group_val[stream] = 1;
1296 /* only two SOBs are currently in use */
1297 cprop->curr_sob_group_idx[stream] =
1298 (cprop->curr_sob_group_idx[stream] + 1) &
1301 gaudi_collective_map_sobs(hdev, stream);
1303 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1304 cprop->curr_sob_group_idx[stream], stream);
1307 /* Increment kref since all slave queues are now waiting on it */
1308 kref_get(&cs_cmpl->hw_sob->kref);
1310 * Must put the signal fence after the SOB refcnt increment so
1311 * the SOB refcnt won't turn 0 and reset the SOB before the
1312 * wait CS was submitted.
1315 hl_fence_put(cs->signal_fence);
1316 cs->signal_fence = NULL;
1319 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1320 struct hl_ctx *ctx, struct hl_cs *cs,
1321 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1323 struct hw_queue_properties *hw_queue_prop;
1324 struct hl_cs_counters_atomic *cntr;
1325 struct hl_cs_job *job;
1330 cntr = &hdev->aggregated_cs_counters;
1332 if (mode == HL_COLLECTIVE_MASTER) {
1333 /* CB size of collective master queue contains
1334 * 4 msg short packets for monitor 1 configuration
1336 * 4 msg short packets for monitor 2 configuration
1338 * 2 msg prot packets for completion and MSI-X
1340 cb_size = sizeof(struct packet_msg_short) * 8 +
1341 sizeof(struct packet_fence) * 2 +
1342 sizeof(struct packet_msg_prot) * 2;
1345 /* CB size of collective slave queues contains
1346 * 4 msg short packets for monitor configuration
1348 * 1 additional msg short packet for sob signal
1350 cb_size = sizeof(struct packet_msg_short) * 5 +
1351 sizeof(struct packet_fence);
1355 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1356 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1358 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1359 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1360 dev_err(hdev->dev, "Failed to allocate a new job\n");
1364 /* Allocate internal mapped CB for non patched CBs */
1365 cb = hl_cb_kernel_create(hdev, cb_size,
1366 hdev->mmu_enable && !patched_cb);
1368 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1369 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1377 atomic_inc(&job->user_cb->cs_cnt);
1378 job->user_cb_size = cb_size;
1379 job->hw_queue_id = queue_id;
1382 * No need in parsing, user CB is the patched CB.
1383 * We call hl_cb_destroy() out of two reasons - we don't need
1384 * the CB in the CB idr anymore and to decrement its refcount as
1385 * it was incremented inside hl_cb_kernel_create().
1388 job->patched_cb = job->user_cb;
1390 job->patched_cb = NULL;
1392 job->job_cb_size = job->user_cb_size;
1393 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1395 /* increment refcount as for external queues we get completion */
1396 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1399 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1401 list_add_tail(&job->cs_node, &cs->job_list);
1403 hl_debugfs_add_job(hdev, job);
1408 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1409 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1410 u32 collective_engine_id)
1412 struct gaudi_device *gaudi = hdev->asic_specific;
1413 struct hw_queue_properties *hw_queue_prop;
1414 u32 queue_id, collective_queue, num_jobs;
1415 u32 stream, nic_queue, nic_idx = 0;
1419 /* Verify wait queue id is configured as master */
1420 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1421 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1423 "Queue %d is not configured as collective master\n",
1428 /* Verify engine id is supported */
1429 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1430 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1432 "Collective wait does not support engine %u\n",
1433 collective_engine_id);
1437 stream = wait_queue_id % 4;
1439 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1440 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1442 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1444 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1445 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1447 /* First job goes to the collective master queue, it will wait for
1448 * the collective slave queues to finish execution.
1449 * The synchronization is done using two monitors:
1450 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1451 * reduction engine (DMA5/TPC7).
1453 * Rest of the jobs goes to the collective slave queues which will
1454 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1456 for (i = 0 ; i < num_jobs ; i++) {
1458 queue_id = wait_queue_id;
1459 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1460 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1462 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1463 if (gaudi->hw_cap_initialized &
1464 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1469 queue_id = nic_queue;
1476 queue_id = collective_queue;
1479 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1480 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1490 static int gaudi_late_init(struct hl_device *hdev)
1492 struct gaudi_device *gaudi = hdev->asic_specific;
1495 rc = gaudi->cpucp_info_get(hdev);
1497 dev_err(hdev->dev, "Failed to get cpucp info\n");
1501 if ((hdev->card_type == cpucp_card_type_pci) &&
1502 (hdev->nic_ports_mask & 0x3)) {
1504 "PCI card detected, only 8 ports are enabled\n");
1505 hdev->nic_ports_mask &= ~0x3;
1507 /* Stop and disable unused NIC QMANs */
1508 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1509 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1510 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1512 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1513 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1514 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1516 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1517 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1519 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1522 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1524 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1528 rc = gaudi_fetch_psoc_frequency(hdev);
1530 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1531 goto disable_pci_access;
1534 rc = gaudi_mmu_clear_pgt_range(hdev);
1536 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1537 goto disable_pci_access;
1540 rc = gaudi_init_tpc_mem(hdev);
1542 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1543 goto disable_pci_access;
1546 rc = gaudi_collective_init(hdev);
1548 dev_err(hdev->dev, "Failed to init collective\n");
1549 goto disable_pci_access;
1555 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1560 static void gaudi_late_fini(struct hl_device *hdev)
1562 const struct hwmon_channel_info **channel_info_arr;
1565 if (!hdev->hl_chip_info->info)
1568 channel_info_arr = hdev->hl_chip_info->info;
1570 while (channel_info_arr[i]) {
1571 kfree(channel_info_arr[i]->config);
1572 kfree(channel_info_arr[i]);
1576 kfree(channel_info_arr);
1578 hdev->hl_chip_info->info = NULL;
1581 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1583 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1584 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1588 * The device CPU works with 40-bits addresses, while bit 39 must be set
1589 * to '1' when accessing the host.
1590 * Bits 49:39 of the full host address are saved for a later
1591 * configuration of the HW to perform extension to 50 bits.
1592 * Because there is a single HW register that holds the extension bits,
1593 * these bits must be identical in all allocated range.
1596 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1598 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1599 HL_CPU_ACCESSIBLE_MEM_SIZE,
1601 GFP_KERNEL | __GFP_ZERO);
1602 if (!virt_addr_arr[i]) {
1604 goto free_dma_mem_arr;
1607 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1608 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1609 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1613 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1615 "MSB of CPU accessible DMA memory are not identical in all range\n");
1617 goto free_dma_mem_arr;
1620 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1621 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1622 hdev->cpu_pci_msb_addr =
1623 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1625 if (!hdev->asic_prop.fw_security_enabled)
1626 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1629 for (j = 0 ; j < i ; j++)
1630 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1631 HL_CPU_ACCESSIBLE_MEM_SIZE,
1638 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1640 struct gaudi_device *gaudi = hdev->asic_specific;
1641 struct gaudi_internal_qman_info *q;
1644 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1645 q = &gaudi->internal_qmans[i];
1646 if (!q->pq_kernel_addr)
1648 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1654 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1656 struct gaudi_device *gaudi = hdev->asic_specific;
1657 struct gaudi_internal_qman_info *q;
1660 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1661 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1664 q = &gaudi->internal_qmans[i];
1667 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1668 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1670 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1671 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1673 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1674 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1676 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1677 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1680 dev_err(hdev->dev, "Bad internal queue index %d", i);
1682 goto free_internal_qmans_pq_mem;
1685 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1688 GFP_KERNEL | __GFP_ZERO);
1689 if (!q->pq_kernel_addr) {
1691 goto free_internal_qmans_pq_mem;
1697 free_internal_qmans_pq_mem:
1698 gaudi_free_internal_qmans_pq_mem(hdev);
1702 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1704 struct asic_fixed_properties *prop = &hdev->asic_prop;
1705 struct pci_mem_region *region;
1708 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1709 region->region_base = CFG_BASE;
1710 region->region_size = CFG_SIZE;
1711 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1712 region->bar_size = CFG_BAR_SIZE;
1713 region->bar_id = CFG_BAR_ID;
1717 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1718 region->region_base = SRAM_BASE_ADDR;
1719 region->region_size = SRAM_SIZE;
1720 region->offset_in_bar = 0;
1721 region->bar_size = SRAM_BAR_SIZE;
1722 region->bar_id = SRAM_BAR_ID;
1726 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1727 region->region_base = DRAM_PHYS_BASE;
1728 region->region_size = hdev->asic_prop.dram_size;
1729 region->offset_in_bar = 0;
1730 region->bar_size = prop->dram_pci_bar_size;
1731 region->bar_id = HBM_BAR_ID;
1735 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1736 region->region_base = PSOC_SCRATCHPAD_ADDR;
1737 region->region_size = PSOC_SCRATCHPAD_SIZE;
1738 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1739 region->bar_size = CFG_BAR_SIZE;
1740 region->bar_id = CFG_BAR_ID;
1744 static int gaudi_sw_init(struct hl_device *hdev)
1746 struct gaudi_device *gaudi;
1747 u32 i, event_id = 0;
1750 /* Allocate device structure */
1751 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1755 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1756 if (gaudi_irq_map_table[i].valid) {
1757 if (event_id == GAUDI_EVENT_SIZE) {
1759 "Event array exceeds the limit of %u events\n",
1762 goto free_gaudi_device;
1765 gaudi->events[event_id++] =
1766 gaudi_irq_map_table[i].fc_id;
1770 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1772 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1774 hdev->asic_specific = gaudi;
1776 /* Create DMA pool for small allocations */
1777 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1778 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1779 if (!hdev->dma_pool) {
1780 dev_err(hdev->dev, "failed to create DMA pool\n");
1782 goto free_gaudi_device;
1785 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1789 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1790 if (!hdev->cpu_accessible_dma_pool) {
1792 "Failed to create CPU accessible DMA pool\n");
1794 goto free_cpu_dma_mem;
1797 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1798 (uintptr_t) hdev->cpu_accessible_dma_mem,
1799 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1802 "Failed to add memory to CPU accessible DMA pool\n");
1804 goto free_cpu_accessible_dma_pool;
1807 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1809 goto free_cpu_accessible_dma_pool;
1811 spin_lock_init(&gaudi->hw_queues_lock);
1812 mutex_init(&gaudi->clk_gate_mutex);
1814 hdev->supports_sync_stream = true;
1815 hdev->supports_coresight = true;
1816 hdev->supports_staged_submission = true;
1818 gaudi_set_pci_memory_regions(hdev);
1822 free_cpu_accessible_dma_pool:
1823 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1825 if (!hdev->asic_prop.fw_security_enabled)
1826 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1827 hdev->cpu_pci_msb_addr);
1828 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1829 HL_CPU_ACCESSIBLE_MEM_SIZE,
1830 hdev->cpu_accessible_dma_mem,
1831 hdev->cpu_accessible_dma_address);
1833 dma_pool_destroy(hdev->dma_pool);
1839 static int gaudi_sw_fini(struct hl_device *hdev)
1841 struct gaudi_device *gaudi = hdev->asic_specific;
1843 gaudi_free_internal_qmans_pq_mem(hdev);
1845 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1847 if (!hdev->asic_prop.fw_security_enabled)
1848 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1849 hdev->cpu_pci_msb_addr);
1851 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1852 HL_CPU_ACCESSIBLE_MEM_SIZE,
1853 hdev->cpu_accessible_dma_mem,
1854 hdev->cpu_accessible_dma_address);
1856 dma_pool_destroy(hdev->dma_pool);
1858 mutex_destroy(&gaudi->clk_gate_mutex);
1865 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1867 struct hl_device *hdev = arg;
1873 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1874 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1876 hl_irq_handler_eq(irq, &hdev->event_queue);
1882 * For backward compatibility, new MSI interrupts should be set after the
1883 * existing CPU and NIC interrupts.
1885 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1890 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1891 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1892 GAUDI_EVENT_QUEUE_MSI_IDX);
1894 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1895 (nr + NIC_NUMBER_OF_ENGINES + 1);
1897 return pci_irq_vector(hdev->pdev, msi_vec);
1900 static int gaudi_enable_msi_single(struct hl_device *hdev)
1904 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1906 irq = gaudi_pci_irq_vector(hdev, 0, false);
1907 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1908 "gaudi single msi", hdev);
1911 "Failed to request single MSI IRQ\n");
1916 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1918 int cq_cnt = hdev->asic_prop.completion_queues_count;
1919 int rc, i, irq_cnt_init, irq;
1921 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1922 irq = gaudi_pci_irq_vector(hdev, i, false);
1923 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1924 &hdev->completion_queue[i]);
1926 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1931 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1932 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1933 &hdev->event_queue);
1935 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1942 for (i = 0 ; i < irq_cnt_init ; i++)
1943 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1944 &hdev->completion_queue[i]);
1948 static int gaudi_enable_msi(struct hl_device *hdev)
1950 struct gaudi_device *gaudi = hdev->asic_specific;
1953 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1956 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1958 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1962 if (rc < NUMBER_OF_INTERRUPTS) {
1963 gaudi->multi_msi_mode = false;
1964 rc = gaudi_enable_msi_single(hdev);
1966 gaudi->multi_msi_mode = true;
1967 rc = gaudi_enable_msi_multi(hdev);
1971 goto free_pci_irq_vectors;
1973 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1977 free_pci_irq_vectors:
1978 pci_free_irq_vectors(hdev->pdev);
1982 static void gaudi_sync_irqs(struct hl_device *hdev)
1984 struct gaudi_device *gaudi = hdev->asic_specific;
1985 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1987 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1990 /* Wait for all pending IRQs to be finished */
1991 if (gaudi->multi_msi_mode) {
1992 for (i = 0 ; i < cq_cnt ; i++)
1993 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1995 synchronize_irq(gaudi_pci_irq_vector(hdev,
1996 GAUDI_EVENT_QUEUE_MSI_IDX,
1999 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2003 static void gaudi_disable_msi(struct hl_device *hdev)
2005 struct gaudi_device *gaudi = hdev->asic_specific;
2006 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2008 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2011 gaudi_sync_irqs(hdev);
2013 if (gaudi->multi_msi_mode) {
2014 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2016 free_irq(irq, &hdev->event_queue);
2018 for (i = 0 ; i < cq_cnt ; i++) {
2019 irq = gaudi_pci_irq_vector(hdev, i, false);
2020 free_irq(irq, &hdev->completion_queue[i]);
2023 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2026 pci_free_irq_vectors(hdev->pdev);
2028 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2031 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2033 struct gaudi_device *gaudi = hdev->asic_specific;
2035 if (hdev->asic_prop.fw_security_enabled)
2038 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2039 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2042 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2045 if (!hdev->sram_scrambler_enable)
2048 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2049 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2050 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2051 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2052 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2053 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2054 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2055 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2056 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2057 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2058 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2059 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2060 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2061 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2062 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2063 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2065 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2066 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2067 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2068 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2069 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2070 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2071 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2072 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2073 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2074 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2075 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2076 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2077 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2078 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2079 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2080 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2082 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2083 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2084 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2085 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2086 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2087 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2088 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2089 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2090 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2091 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2092 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2093 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2094 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2095 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2096 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2097 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2099 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2102 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2104 struct gaudi_device *gaudi = hdev->asic_specific;
2106 if (hdev->asic_prop.fw_security_enabled)
2109 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2110 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2113 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2116 if (!hdev->dram_scrambler_enable)
2119 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2120 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2121 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2122 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2123 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2124 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2125 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2126 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2127 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2128 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2129 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2130 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2131 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2132 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2133 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2134 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2136 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2137 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2138 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2139 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2140 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2141 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2142 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2143 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2144 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2145 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2146 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2147 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2148 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2149 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2150 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2151 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2154 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2155 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2156 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2157 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2158 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2159 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2160 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2161 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2162 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2163 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2164 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2165 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2166 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2167 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2168 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2170 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2173 static void gaudi_init_e2e(struct hl_device *hdev)
2175 if (hdev->asic_prop.fw_security_enabled)
2178 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2179 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2182 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2183 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2184 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2185 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2187 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2188 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2189 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2190 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2192 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2193 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2194 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2195 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2197 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2198 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2199 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2200 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2202 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2203 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2204 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2205 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2207 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2208 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2209 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2210 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2212 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2213 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2214 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2215 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2217 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2218 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2219 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2220 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2222 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2223 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2224 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2225 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2227 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2228 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2229 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2230 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2232 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2233 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2234 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2235 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2237 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2238 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2239 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2240 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2242 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2243 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2244 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2245 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2247 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2248 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2249 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2250 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2252 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2253 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2254 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2255 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2257 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2258 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2259 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2260 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2262 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2263 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2264 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2265 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2267 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2268 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2269 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2270 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2272 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2273 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2274 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2275 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2277 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2278 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2279 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2280 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2282 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2283 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2284 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2285 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2287 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2288 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2289 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2290 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2292 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2293 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2294 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2295 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2297 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2298 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2299 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2300 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2302 if (!hdev->dram_scrambler_enable) {
2303 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2304 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2305 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2306 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2308 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2309 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2310 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2311 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2313 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2314 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2315 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2316 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2318 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2319 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2320 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2321 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2323 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2324 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2325 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2326 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2328 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2329 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2330 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2331 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2333 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2334 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2335 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2336 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2338 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2339 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2340 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2341 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2343 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2344 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2345 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2346 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2348 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2349 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2350 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2351 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2353 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2354 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2355 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2356 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2358 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2359 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2360 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2361 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2363 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2364 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2365 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2366 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2368 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2369 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2370 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2371 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2373 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2374 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2375 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2376 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2378 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2379 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2380 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2381 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2383 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2384 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2385 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2386 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2388 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2389 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2390 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2391 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2393 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2394 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2395 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2396 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2398 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2399 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2400 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2401 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2403 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2404 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2405 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2406 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2408 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2409 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2410 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2411 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2413 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2414 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2415 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2416 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2418 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2419 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2420 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2421 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2424 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2425 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2426 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2427 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2429 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2430 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2431 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2432 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2434 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2435 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2436 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2437 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2439 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2440 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2441 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2442 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2444 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2445 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2446 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2447 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2449 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2450 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2451 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2452 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2454 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2455 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2456 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2457 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2459 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2460 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2461 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2462 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2464 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2465 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2466 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2467 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2469 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2470 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2471 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2472 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2474 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2475 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2476 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2477 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2479 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2480 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2481 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2482 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2484 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2485 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2486 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2487 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2489 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2490 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2491 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2492 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2494 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2495 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2496 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2497 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2499 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2500 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2501 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2502 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2504 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2505 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2506 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2507 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2509 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2510 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2511 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2512 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2514 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2515 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2516 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2517 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2519 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2520 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2521 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2522 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2524 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2525 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2526 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2527 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2529 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2530 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2531 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2532 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2534 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2535 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2536 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2537 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2539 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2540 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2541 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2542 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2545 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2547 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2549 if (hdev->asic_prop.fw_security_enabled)
2552 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2553 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2556 hbm0_wr = 0x33333333;
2557 hbm0_rd = 0x77777777;
2558 hbm1_wr = 0x55555555;
2559 hbm1_rd = 0xDDDDDDDD;
2561 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2562 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2563 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2564 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2566 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2567 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2568 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2569 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2571 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2572 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2573 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2574 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2576 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2577 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2578 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2579 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2581 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2582 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2583 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2584 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2585 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2586 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2587 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2588 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2589 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2590 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2591 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2592 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2594 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2595 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2596 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2597 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2598 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2599 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2600 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2601 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2602 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2603 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2604 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2605 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2608 static void gaudi_init_golden_registers(struct hl_device *hdev)
2613 gaudi_init_e2e(hdev);
2614 gaudi_init_hbm_cred(hdev);
2616 for (tpc_id = 0, tpc_offset = 0;
2617 tpc_id < TPC_NUMBER_OF_ENGINES;
2618 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2619 /* Mask all arithmetic interrupts from TPC */
2620 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2621 /* Set 16 cache lines */
2622 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2623 ICACHE_FETCH_LINE_NUM, 2);
2626 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2627 for (i = 0 ; i < 128 ; i += 8)
2628 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2630 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2631 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2632 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2633 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2636 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2637 int qman_id, dma_addr_t qman_pq_addr)
2639 struct cpu_dyn_regs *dyn_regs =
2640 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2641 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2642 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2643 u32 q_off, dma_qm_offset;
2644 u32 dma_qm_err_cfg, irq_handler_offset;
2646 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2648 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2649 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2650 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2651 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2652 so_base_en_lo = lower_32_bits(CFG_BASE +
2653 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2654 so_base_en_hi = upper_32_bits(CFG_BASE +
2655 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2656 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2657 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2658 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2659 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2660 so_base_ws_lo = lower_32_bits(CFG_BASE +
2661 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2662 so_base_ws_hi = upper_32_bits(CFG_BASE +
2663 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2665 q_off = dma_qm_offset + qman_id * 4;
2667 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2668 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2670 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2671 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2672 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2674 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2675 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2676 QMAN_LDMA_SRC_OFFSET);
2677 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2678 QMAN_LDMA_DST_OFFSET);
2680 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2681 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2682 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2683 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2684 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2685 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2686 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2687 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2689 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2691 /* The following configuration is needed only once per QMAN */
2693 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2694 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2695 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2697 /* Configure RAZWI IRQ */
2698 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2699 if (hdev->stop_on_err)
2701 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2703 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2705 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2706 lower_32_bits(CFG_BASE + irq_handler_offset));
2707 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2708 upper_32_bits(CFG_BASE + irq_handler_offset));
2710 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2711 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2714 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2715 QM_ARB_ERR_MSG_EN_MASK);
2717 /* Increase ARB WDT to support streams architecture */
2718 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2719 GAUDI_ARB_WDT_TIMEOUT);
2721 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2722 QMAN_EXTERNAL_MAKE_TRUSTED);
2724 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2728 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2730 struct cpu_dyn_regs *dyn_regs =
2731 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2732 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2733 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2734 u32 irq_handler_offset;
2736 /* Set to maximum possible according to physical size */
2737 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2738 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2740 /* WA for H/W bug H3-2116 */
2741 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2743 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2744 if (hdev->stop_on_err)
2745 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2747 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2749 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2750 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2751 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2753 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2754 lower_32_bits(CFG_BASE + irq_handler_offset));
2755 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2756 upper_32_bits(CFG_BASE + irq_handler_offset));
2758 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2759 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2760 WREG32(mmDMA0_CORE_PROT + dma_offset,
2761 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2762 /* If the channel is secured, it should be in MMU bypass mode */
2763 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2764 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2765 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2768 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2771 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2773 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2776 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2778 struct gaudi_device *gaudi = hdev->asic_specific;
2779 struct hl_hw_queue *q;
2780 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2782 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2785 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2786 dma_id = gaudi_dma_assignment[i];
2788 * For queues after the CPU Q need to add 1 to get the correct
2789 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2790 * order to get the correct MSI register.
2794 nic_skip = NIC_NUMBER_OF_ENGINES;
2800 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2801 q_idx = 4 * dma_id + j + cpu_skip;
2802 q = &hdev->kernel_queues[q_idx];
2804 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2805 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2809 gaudi_init_dma_core(hdev, dma_id);
2811 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2814 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2817 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2818 int qman_id, u64 qman_base_addr)
2820 struct cpu_dyn_regs *dyn_regs =
2821 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2822 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2823 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2824 u32 dma_qm_err_cfg, irq_handler_offset;
2825 u32 q_off, dma_qm_offset;
2827 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2829 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2830 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2831 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2832 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2833 so_base_en_lo = lower_32_bits(CFG_BASE +
2834 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2835 so_base_en_hi = upper_32_bits(CFG_BASE +
2836 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2837 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2838 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2839 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2840 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2841 so_base_ws_lo = lower_32_bits(CFG_BASE +
2842 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2843 so_base_ws_hi = upper_32_bits(CFG_BASE +
2844 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2846 q_off = dma_qm_offset + qman_id * 4;
2849 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2850 lower_32_bits(qman_base_addr));
2851 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2852 upper_32_bits(qman_base_addr));
2854 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2855 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2856 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2858 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2859 QMAN_CPDMA_SIZE_OFFSET);
2860 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2861 QMAN_CPDMA_SRC_OFFSET);
2862 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2863 QMAN_CPDMA_DST_OFFSET);
2865 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2866 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2867 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2869 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2870 QMAN_LDMA_SIZE_OFFSET);
2871 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2872 QMAN_LDMA_SRC_OFFSET);
2873 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2874 QMAN_LDMA_DST_OFFSET);
2876 /* Configure RAZWI IRQ */
2877 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2878 if (hdev->stop_on_err)
2880 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2882 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2884 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2885 lower_32_bits(CFG_BASE + irq_handler_offset));
2886 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2887 upper_32_bits(CFG_BASE + irq_handler_offset));
2889 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2890 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2893 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2894 QM_ARB_ERR_MSG_EN_MASK);
2896 /* Increase ARB WDT to support streams architecture */
2897 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2898 GAUDI_ARB_WDT_TIMEOUT);
2900 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2901 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2902 QMAN_INTERNAL_MAKE_TRUSTED);
2905 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2906 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2907 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2908 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2910 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2911 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2912 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2914 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2916 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2918 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2923 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2925 struct gaudi_device *gaudi = hdev->asic_specific;
2926 struct gaudi_internal_qman_info *q;
2928 int i, j, dma_id, internal_q_index;
2930 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2933 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2934 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2936 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2938 * Add the CPU queue in order to get the correct queue
2939 * number as all internal queue are placed after it
2941 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2943 q = &gaudi->internal_qmans[internal_q_index];
2944 qman_base_addr = (u64) q->pq_dma_addr;
2945 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2949 /* Initializing lower CP for HBM DMA QMAN */
2950 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2952 gaudi_init_dma_core(hdev, dma_id);
2954 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2957 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2960 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2961 int qman_id, u64 qman_base_addr)
2963 struct cpu_dyn_regs *dyn_regs =
2964 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2965 u32 mtr_base_lo, mtr_base_hi;
2966 u32 so_base_lo, so_base_hi;
2967 u32 irq_handler_offset;
2971 mtr_base_lo = lower_32_bits(CFG_BASE +
2972 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2973 mtr_base_hi = upper_32_bits(CFG_BASE +
2974 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2975 so_base_lo = lower_32_bits(CFG_BASE +
2976 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2977 so_base_hi = upper_32_bits(CFG_BASE +
2978 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2980 q_off = mme_offset + qman_id * 4;
2983 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2984 lower_32_bits(qman_base_addr));
2985 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2986 upper_32_bits(qman_base_addr));
2988 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2989 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2990 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2992 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2993 QMAN_CPDMA_SIZE_OFFSET);
2994 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2995 QMAN_CPDMA_SRC_OFFSET);
2996 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2997 QMAN_CPDMA_DST_OFFSET);
2999 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3000 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3001 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3003 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3004 QMAN_LDMA_SIZE_OFFSET);
3005 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3006 QMAN_LDMA_SRC_OFFSET);
3007 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3008 QMAN_LDMA_DST_OFFSET);
3010 /* Configure RAZWI IRQ */
3011 mme_id = mme_offset /
3012 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3014 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3015 if (hdev->stop_on_err)
3017 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3019 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3021 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3022 lower_32_bits(CFG_BASE + irq_handler_offset));
3023 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3024 upper_32_bits(CFG_BASE + irq_handler_offset));
3026 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3027 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3030 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3031 QM_ARB_ERR_MSG_EN_MASK);
3033 /* Increase ARB WDT to support streams architecture */
3034 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3035 GAUDI_ARB_WDT_TIMEOUT);
3037 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3038 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3039 QMAN_INTERNAL_MAKE_TRUSTED);
3042 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3043 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3044 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3045 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3048 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3050 struct gaudi_device *gaudi = hdev->asic_specific;
3051 struct gaudi_internal_qman_info *q;
3054 int i, internal_q_index;
3056 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3060 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3061 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3064 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3066 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3067 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3068 q = &gaudi->internal_qmans[internal_q_index];
3069 qman_base_addr = (u64) q->pq_dma_addr;
3070 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3076 /* Initializing lower CP for MME QMANs */
3077 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3078 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3079 gaudi_init_mme_qman(hdev, 0, 4, 0);
3081 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3082 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3084 gaudi->hw_cap_initialized |= HW_CAP_MME;
3087 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3088 int qman_id, u64 qman_base_addr)
3090 struct cpu_dyn_regs *dyn_regs =
3091 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3092 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3093 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3094 u32 tpc_qm_err_cfg, irq_handler_offset;
3097 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3098 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3099 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3100 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3101 so_base_en_lo = lower_32_bits(CFG_BASE +
3102 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3103 so_base_en_hi = upper_32_bits(CFG_BASE +
3104 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3105 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3106 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3107 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3108 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3109 so_base_ws_lo = lower_32_bits(CFG_BASE +
3110 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3111 so_base_ws_hi = upper_32_bits(CFG_BASE +
3112 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114 q_off = tpc_offset + qman_id * 4;
3116 tpc_id = tpc_offset /
3117 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3120 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3121 lower_32_bits(qman_base_addr));
3122 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3123 upper_32_bits(qman_base_addr));
3125 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3126 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3127 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3129 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3130 QMAN_CPDMA_SIZE_OFFSET);
3131 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3132 QMAN_CPDMA_SRC_OFFSET);
3133 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3134 QMAN_CPDMA_DST_OFFSET);
3136 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3137 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3138 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3140 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3141 QMAN_LDMA_SIZE_OFFSET);
3142 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3143 QMAN_LDMA_SRC_OFFSET);
3144 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3145 QMAN_LDMA_DST_OFFSET);
3147 /* Configure RAZWI IRQ */
3148 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3149 if (hdev->stop_on_err)
3151 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3153 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3155 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3156 lower_32_bits(CFG_BASE + irq_handler_offset));
3157 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3158 upper_32_bits(CFG_BASE + irq_handler_offset));
3160 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3161 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3164 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3165 QM_ARB_ERR_MSG_EN_MASK);
3167 /* Increase ARB WDT to support streams architecture */
3168 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3169 GAUDI_ARB_WDT_TIMEOUT);
3171 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3172 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3173 QMAN_INTERNAL_MAKE_TRUSTED);
3176 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3177 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3178 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3179 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3181 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3183 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3185 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3187 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3189 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3194 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3196 struct gaudi_device *gaudi = hdev->asic_specific;
3197 struct gaudi_internal_qman_info *q;
3199 u32 so_base_hi, tpc_offset = 0;
3200 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3201 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3202 int i, tpc_id, internal_q_index;
3204 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3207 so_base_hi = upper_32_bits(CFG_BASE +
3208 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3210 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3211 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3212 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3213 tpc_id * QMAN_STREAMS + i;
3214 q = &gaudi->internal_qmans[internal_q_index];
3215 qman_base_addr = (u64) q->pq_dma_addr;
3216 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3220 /* Initializing lower CP for TPC QMAN */
3221 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3223 /* Enable the QMAN and TPC channel */
3224 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3229 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3232 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3234 gaudi->hw_cap_initialized |=
3235 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3239 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3240 int qman_id, u64 qman_base_addr, int nic_id)
3242 struct cpu_dyn_regs *dyn_regs =
3243 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3244 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3245 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3246 u32 nic_qm_err_cfg, irq_handler_offset;
3249 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3250 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3251 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3252 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3253 so_base_en_lo = lower_32_bits(CFG_BASE +
3254 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3255 so_base_en_hi = upper_32_bits(CFG_BASE +
3256 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3257 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3258 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3259 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3260 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3261 so_base_ws_lo = lower_32_bits(CFG_BASE +
3262 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3263 so_base_ws_hi = upper_32_bits(CFG_BASE +
3264 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3266 q_off = nic_offset + qman_id * 4;
3268 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3269 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3271 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3272 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3273 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3275 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3276 QMAN_LDMA_SIZE_OFFSET);
3277 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3278 QMAN_LDMA_SRC_OFFSET);
3279 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3280 QMAN_LDMA_DST_OFFSET);
3282 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3283 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3284 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3285 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3287 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3288 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3289 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3290 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3291 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3294 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3295 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3296 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3298 /* Configure RAZWI IRQ */
3299 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3300 if (hdev->stop_on_err)
3302 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3304 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3306 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3307 lower_32_bits(CFG_BASE + irq_handler_offset));
3308 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3309 upper_32_bits(CFG_BASE + irq_handler_offset));
3311 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3312 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3315 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3316 QM_ARB_ERR_MSG_EN_MASK);
3318 /* Increase ARB WDT to support streams architecture */
3319 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3320 GAUDI_ARB_WDT_TIMEOUT);
3322 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3323 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3324 QMAN_INTERNAL_MAKE_TRUSTED);
3328 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3330 struct gaudi_device *gaudi = hdev->asic_specific;
3331 struct gaudi_internal_qman_info *q;
3334 u32 nic_delta_between_qmans =
3335 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3336 u32 nic_delta_between_nics =
3337 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3338 int i, nic_id, internal_q_index;
3340 if (!hdev->nic_ports_mask)
3343 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3346 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3348 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3350 nic_offset += nic_delta_between_qmans;
3352 nic_offset -= (nic_delta_between_qmans * 2);
3353 nic_offset += nic_delta_between_nics;
3358 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3359 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3360 nic_id * QMAN_STREAMS + i;
3361 q = &gaudi->internal_qmans[internal_q_index];
3362 qman_base_addr = (u64) q->pq_dma_addr;
3363 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3364 qman_base_addr, nic_id);
3367 /* Enable the QMAN */
3368 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3370 nic_offset += nic_delta_between_qmans;
3372 nic_offset -= (nic_delta_between_qmans * 2);
3373 nic_offset += nic_delta_between_nics;
3376 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3380 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3382 struct gaudi_device *gaudi = hdev->asic_specific;
3384 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3387 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3388 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3389 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3392 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3394 struct gaudi_device *gaudi = hdev->asic_specific;
3396 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3399 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3400 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3401 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3402 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3403 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3406 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3408 struct gaudi_device *gaudi = hdev->asic_specific;
3410 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3413 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3414 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3417 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3419 struct gaudi_device *gaudi = hdev->asic_specific;
3423 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3426 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3427 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3428 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3432 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3434 struct gaudi_device *gaudi = hdev->asic_specific;
3435 u32 nic_mask, nic_offset = 0;
3436 u32 nic_delta_between_qmans =
3437 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3438 u32 nic_delta_between_nics =
3439 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3442 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3443 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3445 if (gaudi->hw_cap_initialized & nic_mask)
3446 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3448 nic_offset += nic_delta_between_qmans;
3450 nic_offset -= (nic_delta_between_qmans * 2);
3451 nic_offset += nic_delta_between_nics;
3456 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3458 struct gaudi_device *gaudi = hdev->asic_specific;
3460 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3463 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3464 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3465 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3466 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3471 struct gaudi_device *gaudi = hdev->asic_specific;
3473 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3476 /* Stop CPs of HBM DMA QMANs */
3478 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3479 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3480 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3481 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3482 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3485 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3487 struct gaudi_device *gaudi = hdev->asic_specific;
3489 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3492 /* Stop CPs of MME QMANs */
3493 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3494 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3497 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3499 struct gaudi_device *gaudi = hdev->asic_specific;
3501 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3504 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3505 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3506 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3507 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3508 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3509 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3510 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3511 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3514 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3516 struct gaudi_device *gaudi = hdev->asic_specific;
3518 /* Stop upper CPs of QMANs */
3520 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3521 WREG32(mmNIC0_QM0_GLBL_CFG1,
3522 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3526 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3527 WREG32(mmNIC0_QM1_GLBL_CFG1,
3528 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3532 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3533 WREG32(mmNIC1_QM0_GLBL_CFG1,
3534 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3538 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3539 WREG32(mmNIC1_QM1_GLBL_CFG1,
3540 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3544 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3545 WREG32(mmNIC2_QM0_GLBL_CFG1,
3546 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3547 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3548 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3550 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3551 WREG32(mmNIC2_QM1_GLBL_CFG1,
3552 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3553 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3554 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3556 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3557 WREG32(mmNIC3_QM0_GLBL_CFG1,
3558 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3559 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3560 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3562 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3563 WREG32(mmNIC3_QM1_GLBL_CFG1,
3564 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3565 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3566 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3568 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3569 WREG32(mmNIC4_QM0_GLBL_CFG1,
3570 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3571 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3572 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3574 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3575 WREG32(mmNIC4_QM1_GLBL_CFG1,
3576 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3577 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3578 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3581 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3583 struct gaudi_device *gaudi = hdev->asic_specific;
3585 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3588 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3589 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3590 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3593 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3595 struct gaudi_device *gaudi = hdev->asic_specific;
3597 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3600 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3601 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3602 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3603 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3604 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3607 static void gaudi_mme_stall(struct hl_device *hdev)
3609 struct gaudi_device *gaudi = hdev->asic_specific;
3611 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3614 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3615 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3616 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3617 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3618 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3619 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3620 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3621 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3622 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3623 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3624 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3625 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3626 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3627 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3628 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3629 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3630 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3633 static void gaudi_tpc_stall(struct hl_device *hdev)
3635 struct gaudi_device *gaudi = hdev->asic_specific;
3637 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3640 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3641 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3642 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3643 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3644 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3645 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3646 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3647 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3650 static void gaudi_set_clock_gating(struct hl_device *hdev)
3652 struct gaudi_device *gaudi = hdev->asic_specific;
3657 /* In case we are during debug session, don't enable the clock gate
3658 * as it may interfere
3663 if (hdev->asic_prop.fw_security_enabled)
3666 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3667 enable = !!(hdev->clock_gating_mask &
3668 (BIT_ULL(gaudi_dma_assignment[i])));
3670 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3671 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3672 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3673 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3674 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3677 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3678 enable = !!(hdev->clock_gating_mask &
3679 (BIT_ULL(gaudi_dma_assignment[i])));
3681 /* GC sends work to DMA engine through Upper CP in DMA5 so
3682 * we need to not enable clock gating in that DMA
3684 if (i == GAUDI_HBM_DMA_4)
3687 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3688 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3689 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3690 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3691 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3694 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3695 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3696 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3698 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3699 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3700 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3702 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3703 enable = !!(hdev->clock_gating_mask &
3704 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3706 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3707 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3708 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3709 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3711 qman_offset += TPC_QMAN_OFFSET;
3714 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3717 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3719 struct gaudi_device *gaudi = hdev->asic_specific;
3723 if (hdev->asic_prop.fw_security_enabled)
3726 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3727 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3728 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3730 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3733 WREG32(mmMME0_QM_CGM_CFG, 0);
3734 WREG32(mmMME0_QM_CGM_CFG1, 0);
3735 WREG32(mmMME2_QM_CGM_CFG, 0);
3736 WREG32(mmMME2_QM_CGM_CFG1, 0);
3738 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3739 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3740 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3742 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3745 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3748 static void gaudi_enable_timestamp(struct hl_device *hdev)
3750 /* Disable the timestamp counter */
3751 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3753 /* Zero the lower/upper parts of the 64-bit counter */
3754 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3755 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3757 /* Enable the counter */
3758 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3761 static void gaudi_disable_timestamp(struct hl_device *hdev)
3763 /* Disable the timestamp counter */
3764 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3767 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3769 u32 wait_timeout_ms;
3772 "Halting compute engines and disabling interrupts\n");
3775 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3777 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3779 gaudi_stop_nic_qmans(hdev);
3780 gaudi_stop_mme_qmans(hdev);
3781 gaudi_stop_tpc_qmans(hdev);
3782 gaudi_stop_hbm_dma_qmans(hdev);
3783 gaudi_stop_pci_dma_qmans(hdev);
3785 hdev->asic_funcs->disable_clock_gating(hdev);
3787 msleep(wait_timeout_ms);
3789 gaudi_pci_dma_stall(hdev);
3790 gaudi_hbm_dma_stall(hdev);
3791 gaudi_tpc_stall(hdev);
3792 gaudi_mme_stall(hdev);
3794 msleep(wait_timeout_ms);
3796 gaudi_disable_nic_qmans(hdev);
3797 gaudi_disable_mme_qmans(hdev);
3798 gaudi_disable_tpc_qmans(hdev);
3799 gaudi_disable_hbm_dma_qmans(hdev);
3800 gaudi_disable_pci_dma_qmans(hdev);
3802 gaudi_disable_timestamp(hdev);
3804 gaudi_disable_msi(hdev);
3807 static int gaudi_mmu_init(struct hl_device *hdev)
3809 struct asic_fixed_properties *prop = &hdev->asic_prop;
3810 struct gaudi_device *gaudi = hdev->asic_specific;
3814 if (!hdev->mmu_enable)
3817 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3820 for (i = 0 ; i < prop->max_asid ; i++) {
3821 hop0_addr = prop->mmu_pgt_addr +
3822 (i * prop->mmu_hop_table_size);
3824 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3827 "failed to set hop0 addr for asid %d\n", i);
3832 /* init MMU cache manage page */
3833 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3834 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3836 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3838 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3839 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3841 WREG32(mmSTLB_HOP_CONFIGURATION,
3842 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3845 * The H/W expects the first PI after init to be 1. After wraparound
3848 gaudi->mmu_cache_inv_pi = 1;
3850 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3858 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3862 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3864 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3867 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3871 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3873 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3876 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3878 struct dynamic_fw_load_mgr *dynamic_loader;
3879 struct cpu_dyn_regs *dyn_regs;
3881 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3884 * here we update initial values for few specific dynamic regs (as
3885 * before reading the first descriptor from FW those value has to be
3886 * hard-coded) in later stages of the protocol those values will be
3887 * updated automatically by reading the FW descriptor so data there
3888 * will always be up-to-date
3890 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3891 dyn_regs->kmd_msg_to_cpu =
3892 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3893 dyn_regs->cpu_cmd_status_to_host =
3894 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3896 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3899 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3901 struct static_fw_load_mgr *static_loader;
3903 static_loader = &hdev->fw_loader.static_loader;
3905 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3906 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3907 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3908 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3909 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3910 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3911 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3912 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3913 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3914 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3915 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3916 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3917 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3918 GAUDI_PLDM_RESET_WAIT_MSEC :
3919 GAUDI_CPU_RESET_WAIT_MSEC;
3922 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3924 struct asic_fixed_properties *prop = &hdev->asic_prop;
3925 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3927 /* fill common fields */
3928 fw_loader->linux_loaded = false;
3929 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3930 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3931 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3932 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3933 fw_loader->skip_bmc = !hdev->bmc_enable;
3934 fw_loader->sram_bar_id = SRAM_BAR_ID;
3935 fw_loader->dram_bar_id = HBM_BAR_ID;
3937 if (prop->dynamic_fw_load)
3938 gaudi_init_dynamic_firmware_loader(hdev);
3940 gaudi_init_static_firmware_loader(hdev);
3943 static int gaudi_init_cpu(struct hl_device *hdev)
3945 struct gaudi_device *gaudi = hdev->asic_specific;
3948 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3951 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3955 * The device CPU works with 40 bits addresses.
3956 * This register sets the extension to 50 bits.
3958 if (!hdev->asic_prop.fw_security_enabled)
3959 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3961 rc = hl_fw_init_cpu(hdev);
3966 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3971 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3973 struct cpu_dyn_regs *dyn_regs =
3974 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3975 struct asic_fixed_properties *prop = &hdev->asic_prop;
3976 struct gaudi_device *gaudi = hdev->asic_specific;
3977 u32 status, irq_handler_offset;
3979 struct hl_hw_queue *cpu_pq =
3980 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3983 if (!hdev->cpu_queues_enable)
3986 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3989 eq = &hdev->event_queue;
3991 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3992 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3994 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3995 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3997 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3998 lower_32_bits(hdev->cpu_accessible_dma_address));
3999 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4000 upper_32_bits(hdev->cpu_accessible_dma_address));
4002 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4003 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4004 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4006 /* Used for EQ CI */
4007 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4009 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4011 if (gaudi->multi_msi_mode)
4012 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4014 WREG32(mmCPU_IF_QUEUE_INIT,
4015 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4017 irq_handler_offset = prop->gic_interrupts_enable ?
4018 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4019 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4021 WREG32(irq_handler_offset,
4022 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4024 err = hl_poll_timeout(
4026 mmCPU_IF_QUEUE_INIT,
4028 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4034 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4038 /* update FW application security bits */
4039 if (prop->fw_cpu_boot_dev_sts0_valid)
4040 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4041 if (prop->fw_cpu_boot_dev_sts1_valid)
4042 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4044 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4048 static void gaudi_pre_hw_init(struct hl_device *hdev)
4050 /* Perform read from the device to make sure device is up */
4053 if (!hdev->asic_prop.fw_security_enabled) {
4054 /* Set the access through PCI bars (Linux driver only) as
4057 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4058 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4059 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4061 /* Perform read to flush the waiting writes to ensure
4062 * configuration was set in the device
4064 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4068 * Let's mark in the H/W that we have reached this point. We check
4069 * this value in the reset_before_init function to understand whether
4070 * we need to reset the chip before doing H/W init. This register is
4071 * cleared by the H/W upon H/W reset
4073 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4076 static int gaudi_hw_init(struct hl_device *hdev)
4078 struct gaudi_device *gaudi = hdev->asic_specific;
4081 gaudi_pre_hw_init(hdev);
4083 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4084 * So we set it here and if anyone tries to move it later to
4085 * a different address, there will be an error
4087 if (hdev->asic_prop.iatu_done_by_fw)
4088 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4091 * Before pushing u-boot/linux to device, need to set the hbm bar to
4092 * base address of dram
4094 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4096 "failed to map HBM bar to DRAM base address\n");
4100 rc = gaudi_init_cpu(hdev);
4102 dev_err(hdev->dev, "failed to initialize CPU\n");
4106 /* In case the clock gating was enabled in preboot we need to disable
4107 * it here before touching the MME/TPC registers.
4108 * There is no need to take clk gating mutex because when this function
4109 * runs, no other relevant code can run
4111 hdev->asic_funcs->disable_clock_gating(hdev);
4113 /* SRAM scrambler must be initialized after CPU is running from HBM */
4114 gaudi_init_scrambler_sram(hdev);
4116 /* This is here just in case we are working without CPU */
4117 gaudi_init_scrambler_hbm(hdev);
4119 gaudi_init_golden_registers(hdev);
4121 rc = gaudi_mmu_init(hdev);
4125 gaudi_init_security(hdev);
4127 gaudi_init_pci_dma_qmans(hdev);
4129 gaudi_init_hbm_dma_qmans(hdev);
4131 gaudi_init_mme_qmans(hdev);
4133 gaudi_init_tpc_qmans(hdev);
4135 gaudi_init_nic_qmans(hdev);
4137 hdev->asic_funcs->set_clock_gating(hdev);
4139 gaudi_enable_timestamp(hdev);
4141 /* MSI must be enabled before CPU queues and NIC are initialized */
4142 rc = gaudi_enable_msi(hdev);
4144 goto disable_queues;
4146 /* must be called after MSI was enabled */
4147 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4149 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4154 /* Perform read from the device to flush all configuration */
4160 gaudi_disable_msi(hdev);
4162 gaudi_disable_mme_qmans(hdev);
4163 gaudi_disable_pci_dma_qmans(hdev);
4168 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4170 struct cpu_dyn_regs *dyn_regs =
4171 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4172 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4173 struct gaudi_device *gaudi = hdev->asic_specific;
4174 bool driver_performs_reset;
4177 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4182 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4183 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4185 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4186 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4189 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4190 !hdev->asic_prop.hard_reset_done_by_fw);
4192 /* Set device to handle FLR by H/W as we will put the device CPU to
4195 if (driver_performs_reset)
4196 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4197 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4199 /* If linux is loaded in the device CPU we need to communicate with it
4200 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4201 * registers in case of old F/Ws
4203 if (hdev->fw_loader.linux_loaded) {
4204 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4205 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4206 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4208 WREG32(irq_handler_offset,
4209 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4211 if (hdev->asic_prop.hard_reset_done_by_fw)
4212 hl_fw_ask_hard_reset_without_linux(hdev);
4214 hl_fw_ask_halt_machine_without_linux(hdev);
4217 if (driver_performs_reset) {
4219 /* Configure the reset registers. Must be done as early as
4220 * possible in case we fail during H/W initialization
4222 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4223 (CFG_RST_H_DMA_MASK |
4224 CFG_RST_H_MME_MASK |
4226 CFG_RST_H_TPC_7_MASK));
4228 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4230 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4231 (CFG_RST_H_HBM_MASK |
4232 CFG_RST_H_TPC_7_MASK |
4233 CFG_RST_H_NIC_MASK |
4235 CFG_RST_H_DMA_MASK |
4236 CFG_RST_H_MME_MASK |
4237 CFG_RST_H_CPU_MASK |
4238 CFG_RST_H_MMU_MASK));
4240 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4241 (CFG_RST_L_IF_MASK |
4242 CFG_RST_L_PSOC_MASK |
4243 CFG_RST_L_TPC_MASK));
4245 msleep(cpu_timeout_ms);
4247 /* Tell ASIC not to re-initialize PCIe */
4248 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4250 /* Restart BTL/BLR upon hard-reset */
4251 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4253 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4254 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4257 "Issued HARD reset command, going to wait %dms\n",
4261 "Firmware performs HARD reset, going to wait %dms\n",
4266 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4267 * itself is in reset. Need to wait until the reset is deasserted
4269 msleep(reset_timeout_ms);
4271 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4272 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4274 "Timeout while waiting for device to reset 0x%x\n",
4278 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4279 HW_CAP_HBM | HW_CAP_PCI_DMA |
4280 HW_CAP_MME | HW_CAP_TPC_MASK |
4281 HW_CAP_HBM_DMA | HW_CAP_PLL |
4282 HW_CAP_NIC_MASK | HW_CAP_MMU |
4283 HW_CAP_SRAM_SCRAMBLER |
4284 HW_CAP_HBM_SCRAMBLER |
4287 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4289 hdev->device_cpu_is_halted = false;
4293 static int gaudi_suspend(struct hl_device *hdev)
4297 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4299 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4304 static int gaudi_resume(struct hl_device *hdev)
4306 return gaudi_init_iatu(hdev);
4309 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4310 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4314 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4315 VM_DONTCOPY | VM_NORESERVE;
4317 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4318 (dma_addr - HOST_PHYS_BASE), size);
4320 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4325 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4327 struct cpu_dyn_regs *dyn_regs =
4328 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4329 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4330 struct gaudi_device *gaudi = hdev->asic_specific;
4331 bool invalid_queue = false;
4334 switch (hw_queue_id) {
4335 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4336 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4337 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4338 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4339 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4342 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4343 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4344 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4345 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4346 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4349 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4350 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4351 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4352 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4353 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4356 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4357 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4358 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4359 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4360 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4363 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4364 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4365 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4366 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4367 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4370 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4371 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4372 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4373 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4374 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4377 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4378 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4379 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4380 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4381 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4384 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4385 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4386 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4387 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4388 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4391 case GAUDI_QUEUE_ID_CPU_PQ:
4392 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4393 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4395 invalid_queue = true;
4398 case GAUDI_QUEUE_ID_MME_0_0:
4399 db_reg_offset = mmMME2_QM_PQ_PI_0;
4402 case GAUDI_QUEUE_ID_MME_0_1:
4403 db_reg_offset = mmMME2_QM_PQ_PI_1;
4406 case GAUDI_QUEUE_ID_MME_0_2:
4407 db_reg_offset = mmMME2_QM_PQ_PI_2;
4410 case GAUDI_QUEUE_ID_MME_0_3:
4411 db_reg_offset = mmMME2_QM_PQ_PI_3;
4414 case GAUDI_QUEUE_ID_MME_1_0:
4415 db_reg_offset = mmMME0_QM_PQ_PI_0;
4418 case GAUDI_QUEUE_ID_MME_1_1:
4419 db_reg_offset = mmMME0_QM_PQ_PI_1;
4422 case GAUDI_QUEUE_ID_MME_1_2:
4423 db_reg_offset = mmMME0_QM_PQ_PI_2;
4426 case GAUDI_QUEUE_ID_MME_1_3:
4427 db_reg_offset = mmMME0_QM_PQ_PI_3;
4430 case GAUDI_QUEUE_ID_TPC_0_0:
4431 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4434 case GAUDI_QUEUE_ID_TPC_0_1:
4435 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4438 case GAUDI_QUEUE_ID_TPC_0_2:
4439 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4442 case GAUDI_QUEUE_ID_TPC_0_3:
4443 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4446 case GAUDI_QUEUE_ID_TPC_1_0:
4447 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4450 case GAUDI_QUEUE_ID_TPC_1_1:
4451 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4454 case GAUDI_QUEUE_ID_TPC_1_2:
4455 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4458 case GAUDI_QUEUE_ID_TPC_1_3:
4459 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4462 case GAUDI_QUEUE_ID_TPC_2_0:
4463 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4466 case GAUDI_QUEUE_ID_TPC_2_1:
4467 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4470 case GAUDI_QUEUE_ID_TPC_2_2:
4471 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4474 case GAUDI_QUEUE_ID_TPC_2_3:
4475 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4478 case GAUDI_QUEUE_ID_TPC_3_0:
4479 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4482 case GAUDI_QUEUE_ID_TPC_3_1:
4483 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4486 case GAUDI_QUEUE_ID_TPC_3_2:
4487 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4490 case GAUDI_QUEUE_ID_TPC_3_3:
4491 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4494 case GAUDI_QUEUE_ID_TPC_4_0:
4495 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4498 case GAUDI_QUEUE_ID_TPC_4_1:
4499 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4502 case GAUDI_QUEUE_ID_TPC_4_2:
4503 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4506 case GAUDI_QUEUE_ID_TPC_4_3:
4507 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4510 case GAUDI_QUEUE_ID_TPC_5_0:
4511 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4514 case GAUDI_QUEUE_ID_TPC_5_1:
4515 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4518 case GAUDI_QUEUE_ID_TPC_5_2:
4519 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4522 case GAUDI_QUEUE_ID_TPC_5_3:
4523 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4526 case GAUDI_QUEUE_ID_TPC_6_0:
4527 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4530 case GAUDI_QUEUE_ID_TPC_6_1:
4531 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4534 case GAUDI_QUEUE_ID_TPC_6_2:
4535 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4538 case GAUDI_QUEUE_ID_TPC_6_3:
4539 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4542 case GAUDI_QUEUE_ID_TPC_7_0:
4543 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4546 case GAUDI_QUEUE_ID_TPC_7_1:
4547 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4550 case GAUDI_QUEUE_ID_TPC_7_2:
4551 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4554 case GAUDI_QUEUE_ID_TPC_7_3:
4555 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4558 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4559 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4560 invalid_queue = true;
4562 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4563 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4566 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4567 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4568 invalid_queue = true;
4570 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4571 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4574 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4575 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4576 invalid_queue = true;
4578 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4579 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4582 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4583 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4584 invalid_queue = true;
4586 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4587 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4590 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4591 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4592 invalid_queue = true;
4594 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4595 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4598 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4599 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4600 invalid_queue = true;
4602 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4603 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4606 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4607 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4608 invalid_queue = true;
4610 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4611 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4614 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4615 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4616 invalid_queue = true;
4618 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4619 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4622 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4623 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4624 invalid_queue = true;
4626 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4627 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4630 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4631 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4632 invalid_queue = true;
4634 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4635 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4639 invalid_queue = true;
4642 if (invalid_queue) {
4643 /* Should never get here */
4644 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4651 /* ring the doorbell */
4652 WREG32(db_reg_offset, db_value);
4654 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4655 /* make sure device CPU will read latest data from host */
4658 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4659 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4660 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4662 WREG32(irq_handler_offset,
4663 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4667 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4670 __le64 *pbd = (__le64 *) bd;
4672 /* The QMANs are on the host memory so a simple copy suffice */
4677 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4678 dma_addr_t *dma_handle, gfp_t flags)
4680 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4683 /* Shift to the device's base physical address of host memory */
4685 *dma_handle += HOST_PHYS_BASE;
4690 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4691 void *cpu_addr, dma_addr_t dma_handle)
4693 /* Cancel the device's base physical address of host memory */
4694 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4696 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4699 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4701 struct asic_fixed_properties *prop = &hdev->asic_prop;
4702 u64 cur_addr = DRAM_BASE_ADDR_USER;
4707 while (cur_addr < prop->dram_end_address) {
4708 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4709 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4712 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4715 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4716 cur_addr, cur_addr + chunk_size);
4718 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4719 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4720 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4721 lower_32_bits(cur_addr));
4722 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4723 upper_32_bits(cur_addr));
4724 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4726 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4727 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4728 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4730 cur_addr += chunk_size;
4732 if (cur_addr == prop->dram_end_address)
4736 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4737 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4739 rc = hl_poll_timeout(
4741 mmDMA0_CORE_STS0 + dma_offset,
4743 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4745 HBM_SCRUBBING_TIMEOUT_US);
4749 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4759 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4761 struct asic_fixed_properties *prop = &hdev->asic_prop;
4762 struct gaudi_device *gaudi = hdev->asic_specific;
4766 if (!hdev->memory_scrub)
4769 if (!addr && !size) {
4770 /* Wait till device is idle */
4771 rc = hl_poll_timeout(
4773 mmDMA0_CORE_STS0/* dummy */,
4775 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4778 HBM_SCRUBBING_TIMEOUT_US);
4780 dev_err(hdev->dev, "waiting for idle timeout\n");
4785 addr = prop->sram_user_base_address;
4786 size = hdev->pldm ? 0x10000 :
4787 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4788 val = 0x7777777777777777ull;
4790 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4793 "Failed to clear SRAM in mem scrub all\n");
4797 mutex_lock(&gaudi->clk_gate_mutex);
4798 hdev->asic_funcs->disable_clock_gating(hdev);
4800 /* Scrub HBM using all DMA channels in parallel */
4801 rc = gaudi_hbm_scrubbing(hdev);
4804 "Failed to clear HBM in mem scrub all\n");
4806 hdev->asic_funcs->set_clock_gating(hdev);
4807 mutex_unlock(&gaudi->clk_gate_mutex);
4813 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4814 u32 queue_id, dma_addr_t *dma_handle,
4817 struct gaudi_device *gaudi = hdev->asic_specific;
4818 struct gaudi_internal_qman_info *q;
4820 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4821 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4822 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4826 q = &gaudi->internal_qmans[queue_id];
4827 *dma_handle = q->pq_dma_addr;
4828 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4830 return q->pq_kernel_addr;
4833 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4834 u16 len, u32 timeout, u64 *result)
4836 struct gaudi_device *gaudi = hdev->asic_specific;
4838 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4845 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4847 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4851 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4853 struct packet_msg_prot *fence_pkt;
4854 dma_addr_t pkt_dma_addr;
4855 u32 fence_val, tmp, timeout_usec;
4856 dma_addr_t fence_dma_addr;
4861 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4863 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4865 fence_val = GAUDI_QMAN0_FENCE_VAL;
4867 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4871 "Failed to allocate memory for H/W queue %d testing\n",
4878 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4879 sizeof(struct packet_msg_prot),
4880 GFP_KERNEL, &pkt_dma_addr);
4883 "Failed to allocate packet for H/W queue %d testing\n",
4886 goto free_fence_ptr;
4889 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4890 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4891 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4893 fence_pkt->ctl = cpu_to_le32(tmp);
4894 fence_pkt->value = cpu_to_le32(fence_val);
4895 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4897 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4898 sizeof(struct packet_msg_prot),
4902 "Failed to send fence packet to H/W queue %d\n",
4907 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4908 1000, timeout_usec, true);
4910 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4912 if (rc == -ETIMEDOUT) {
4914 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4915 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4920 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4923 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4928 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4930 struct gaudi_device *gaudi = hdev->asic_specific;
4933 * check capability here as send_cpu_message() won't update the result
4934 * value if no capability
4936 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4939 return hl_fw_test_cpu_queue(hdev);
4942 static int gaudi_test_queues(struct hl_device *hdev)
4944 int i, rc, ret_val = 0;
4946 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4947 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4948 rc = gaudi_test_queue(hdev, i);
4954 rc = gaudi_test_cpu_queue(hdev);
4961 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4962 gfp_t mem_flags, dma_addr_t *dma_handle)
4966 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4969 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4971 /* Shift to the device's base physical address of host memory */
4973 *dma_handle += HOST_PHYS_BASE;
4978 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4979 dma_addr_t dma_addr)
4981 /* Cancel the device's base physical address of host memory */
4982 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4984 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4987 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4988 size_t size, dma_addr_t *dma_handle)
4990 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4993 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4994 size_t size, void *vaddr)
4996 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4999 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5000 int nents, enum dma_data_direction dir)
5002 struct scatterlist *sg;
5005 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5008 /* Shift to the device's base physical address of host memory */
5009 for_each_sg(sgl, sg, nents, i)
5010 sg->dma_address += HOST_PHYS_BASE;
5015 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5016 int nents, enum dma_data_direction dir)
5018 struct scatterlist *sg;
5021 /* Cancel the device's base physical address of host memory */
5022 for_each_sg(sgl, sg, nents, i)
5023 sg->dma_address -= HOST_PHYS_BASE;
5025 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5028 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5029 struct sg_table *sgt)
5031 struct scatterlist *sg, *sg_next_iter;
5032 u32 count, dma_desc_cnt;
5034 dma_addr_t addr, addr_next;
5038 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5040 len = sg_dma_len(sg);
5041 addr = sg_dma_address(sg);
5046 while ((count + 1) < sgt->nents) {
5047 sg_next_iter = sg_next(sg);
5048 len_next = sg_dma_len(sg_next_iter);
5049 addr_next = sg_dma_address(sg_next_iter);
5054 if ((addr + len == addr_next) &&
5055 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5067 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5070 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5071 struct hl_cs_parser *parser,
5072 struct packet_lin_dma *user_dma_pkt,
5073 u64 addr, enum dma_data_direction dir)
5075 struct hl_userptr *userptr;
5078 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5079 parser->job_userptr_list, &userptr))
5080 goto already_pinned;
5082 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5086 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5091 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5093 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5094 userptr->sgt->nents, dir);
5096 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5100 userptr->dma_mapped = true;
5104 parser->patched_cb_size +=
5105 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5110 list_del(&userptr->job_node);
5111 hl_unpin_host_memory(hdev, userptr);
5117 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5118 struct hl_cs_parser *parser,
5119 struct packet_lin_dma *user_dma_pkt,
5122 enum dma_data_direction dir;
5123 bool skip_host_mem_pin = false, user_memset;
5127 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5128 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5129 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5133 skip_host_mem_pin = true;
5135 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5136 dir = DMA_TO_DEVICE;
5137 addr = le64_to_cpu(user_dma_pkt->src_addr);
5139 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5140 dir = DMA_FROM_DEVICE;
5141 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5142 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5143 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5146 if (skip_host_mem_pin)
5147 parser->patched_cb_size += sizeof(*user_dma_pkt);
5149 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5155 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5156 struct hl_cs_parser *parser,
5157 struct packet_lin_dma *user_dma_pkt)
5159 bool src_in_host = false;
5160 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5161 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5162 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5164 dev_dbg(hdev->dev, "DMA packet details:\n");
5165 dev_dbg(hdev->dev, "source == 0x%llx\n",
5166 le64_to_cpu(user_dma_pkt->src_addr));
5167 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5168 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5171 * Special handling for DMA with size 0. Bypass all validations
5172 * because no transactions will be done except for WR_COMP, which
5173 * is not a security issue
5175 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5176 parser->patched_cb_size += sizeof(*user_dma_pkt);
5180 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5183 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5187 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5188 struct hl_cs_parser *parser,
5189 struct packet_load_and_exe *user_pkt)
5193 cfg = le32_to_cpu(user_pkt->cfg);
5195 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5197 "User not allowed to use Load and Execute\n");
5201 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5206 static int gaudi_validate_cb(struct hl_device *hdev,
5207 struct hl_cs_parser *parser, bool is_mmu)
5209 u32 cb_parsed_length = 0;
5212 parser->patched_cb_size = 0;
5214 /* cb_user_size is more than 0 so loop will always be executed */
5215 while (cb_parsed_length < parser->user_cb_size) {
5216 enum packet_id pkt_id;
5218 struct gaudi_packet *user_pkt;
5220 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5222 pkt_id = (enum packet_id) (
5223 (le64_to_cpu(user_pkt->header) &
5224 PACKET_HEADER_PACKET_ID_MASK) >>
5225 PACKET_HEADER_PACKET_ID_SHIFT);
5227 if (!validate_packet_id(pkt_id)) {
5228 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5233 pkt_size = gaudi_packet_sizes[pkt_id];
5234 cb_parsed_length += pkt_size;
5235 if (cb_parsed_length > parser->user_cb_size) {
5237 "packet 0x%x is out of CB boundary\n", pkt_id);
5243 case PACKET_MSG_PROT:
5245 "User not allowed to use MSG_PROT\n");
5250 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5255 dev_err(hdev->dev, "User not allowed to use STOP\n");
5259 case PACKET_WREG_BULK:
5261 "User not allowed to use WREG_BULK\n");
5265 case PACKET_LOAD_AND_EXE:
5266 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5267 (struct packet_load_and_exe *) user_pkt);
5270 case PACKET_LIN_DMA:
5271 parser->contains_dma_pkt = true;
5273 parser->patched_cb_size += pkt_size;
5275 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5276 (struct packet_lin_dma *) user_pkt);
5279 case PACKET_WREG_32:
5280 case PACKET_MSG_LONG:
5281 case PACKET_MSG_SHORT:
5285 case PACKET_ARB_POINT:
5286 parser->patched_cb_size += pkt_size;
5290 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5301 * The new CB should have space at the end for two MSG_PROT packets:
5302 * 1. A packet that will act as a completion packet
5303 * 2. A packet that will generate MSI-X interrupt
5305 if (parser->completion)
5306 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5311 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5312 struct hl_cs_parser *parser,
5313 struct packet_lin_dma *user_dma_pkt,
5314 struct packet_lin_dma *new_dma_pkt,
5315 u32 *new_dma_pkt_size)
5317 struct hl_userptr *userptr;
5318 struct scatterlist *sg, *sg_next_iter;
5319 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5321 dma_addr_t dma_addr, dma_addr_next;
5322 u64 device_memory_addr, addr;
5323 enum dma_data_direction dir;
5324 struct sg_table *sgt;
5325 bool src_in_host = false;
5326 bool skip_host_mem_pin = false;
5329 ctl = le32_to_cpu(user_dma_pkt->ctl);
5331 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5334 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5335 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5338 addr = le64_to_cpu(user_dma_pkt->src_addr);
5339 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5340 dir = DMA_TO_DEVICE;
5342 skip_host_mem_pin = true;
5344 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5345 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5346 dir = DMA_FROM_DEVICE;
5349 if ((!skip_host_mem_pin) &&
5350 (!hl_userptr_is_pinned(hdev, addr,
5351 le32_to_cpu(user_dma_pkt->tsize),
5352 parser->job_userptr_list, &userptr))) {
5353 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5354 addr, user_dma_pkt->tsize);
5358 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5359 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5360 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5364 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5369 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5370 len = sg_dma_len(sg);
5371 dma_addr = sg_dma_address(sg);
5376 while ((count + 1) < sgt->nents) {
5377 sg_next_iter = sg_next(sg);
5378 len_next = sg_dma_len(sg_next_iter);
5379 dma_addr_next = sg_dma_address(sg_next_iter);
5384 if ((dma_addr + len == dma_addr_next) &&
5385 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5394 ctl = le32_to_cpu(user_dma_pkt->ctl);
5395 if (likely(dma_desc_cnt))
5396 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5397 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5398 new_dma_pkt->ctl = cpu_to_le32(ctl);
5399 new_dma_pkt->tsize = cpu_to_le32(len);
5401 if (dir == DMA_TO_DEVICE) {
5402 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5403 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5405 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5406 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5410 device_memory_addr += len;
5415 if (!dma_desc_cnt) {
5417 "Error of 0 SG entries when patching DMA packet\n");
5421 /* Fix the last dma packet - wrcomp must be as user set it */
5423 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5425 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5430 static int gaudi_patch_cb(struct hl_device *hdev,
5431 struct hl_cs_parser *parser)
5433 u32 cb_parsed_length = 0;
5434 u32 cb_patched_cur_length = 0;
5437 /* cb_user_size is more than 0 so loop will always be executed */
5438 while (cb_parsed_length < parser->user_cb_size) {
5439 enum packet_id pkt_id;
5441 u32 new_pkt_size = 0;
5442 struct gaudi_packet *user_pkt, *kernel_pkt;
5444 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5445 kernel_pkt = parser->patched_cb->kernel_address +
5446 cb_patched_cur_length;
5448 pkt_id = (enum packet_id) (
5449 (le64_to_cpu(user_pkt->header) &
5450 PACKET_HEADER_PACKET_ID_MASK) >>
5451 PACKET_HEADER_PACKET_ID_SHIFT);
5453 if (!validate_packet_id(pkt_id)) {
5454 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5459 pkt_size = gaudi_packet_sizes[pkt_id];
5460 cb_parsed_length += pkt_size;
5461 if (cb_parsed_length > parser->user_cb_size) {
5463 "packet 0x%x is out of CB boundary\n", pkt_id);
5469 case PACKET_LIN_DMA:
5470 rc = gaudi_patch_dma_packet(hdev, parser,
5471 (struct packet_lin_dma *) user_pkt,
5472 (struct packet_lin_dma *) kernel_pkt,
5474 cb_patched_cur_length += new_pkt_size;
5477 case PACKET_MSG_PROT:
5479 "User not allowed to use MSG_PROT\n");
5484 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5489 dev_err(hdev->dev, "User not allowed to use STOP\n");
5493 case PACKET_WREG_32:
5494 case PACKET_WREG_BULK:
5495 case PACKET_MSG_LONG:
5496 case PACKET_MSG_SHORT:
5500 case PACKET_ARB_POINT:
5501 case PACKET_LOAD_AND_EXE:
5502 memcpy(kernel_pkt, user_pkt, pkt_size);
5503 cb_patched_cur_length += pkt_size;
5507 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5520 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5521 struct hl_cs_parser *parser)
5523 u64 patched_cb_handle;
5524 u32 patched_cb_size;
5525 struct hl_cb *user_cb;
5529 * The new CB should have space at the end for two MSG_PROT pkt:
5530 * 1. A packet that will act as a completion packet
5531 * 2. A packet that will generate MSI interrupt
5533 if (parser->completion)
5534 parser->patched_cb_size = parser->user_cb_size +
5535 sizeof(struct packet_msg_prot) * 2;
5537 parser->patched_cb_size = parser->user_cb_size;
5539 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5540 parser->patched_cb_size, false, false,
5541 &patched_cb_handle);
5545 "Failed to allocate patched CB for DMA CS %d\n",
5550 patched_cb_handle >>= PAGE_SHIFT;
5551 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5552 (u32) patched_cb_handle);
5553 /* hl_cb_get should never fail */
5554 if (!parser->patched_cb) {
5555 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5556 (u32) patched_cb_handle);
5562 * The check that parser->user_cb_size <= parser->user_cb->size was done
5563 * in validate_queue_index().
5565 memcpy(parser->patched_cb->kernel_address,
5566 parser->user_cb->kernel_address,
5567 parser->user_cb_size);
5569 patched_cb_size = parser->patched_cb_size;
5571 /* Validate patched CB instead of user CB */
5572 user_cb = parser->user_cb;
5573 parser->user_cb = parser->patched_cb;
5574 rc = gaudi_validate_cb(hdev, parser, true);
5575 parser->user_cb = user_cb;
5578 hl_cb_put(parser->patched_cb);
5582 if (patched_cb_size != parser->patched_cb_size) {
5583 dev_err(hdev->dev, "user CB size mismatch\n");
5584 hl_cb_put(parser->patched_cb);
5591 * Always call cb destroy here because we still have 1 reference
5592 * to it by calling cb_get earlier. After the job will be completed,
5593 * cb_put will release it, but here we want to remove it from the
5596 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5597 patched_cb_handle << PAGE_SHIFT);
5602 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5603 struct hl_cs_parser *parser)
5605 u64 patched_cb_handle;
5608 rc = gaudi_validate_cb(hdev, parser, false);
5613 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5614 parser->patched_cb_size, false, false,
5615 &patched_cb_handle);
5618 "Failed to allocate patched CB for DMA CS %d\n", rc);
5622 patched_cb_handle >>= PAGE_SHIFT;
5623 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5624 (u32) patched_cb_handle);
5625 /* hl_cb_get should never fail here */
5626 if (!parser->patched_cb) {
5627 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5628 (u32) patched_cb_handle);
5633 rc = gaudi_patch_cb(hdev, parser);
5636 hl_cb_put(parser->patched_cb);
5640 * Always call cb destroy here because we still have 1 reference
5641 * to it by calling cb_get earlier. After the job will be completed,
5642 * cb_put will release it, but here we want to remove it from the
5645 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5646 patched_cb_handle << PAGE_SHIFT);
5650 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5654 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5655 struct hl_cs_parser *parser)
5657 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5658 struct gaudi_device *gaudi = hdev->asic_specific;
5659 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5660 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5662 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5663 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5664 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5665 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5666 parser->hw_queue_id);
5670 /* For internal queue jobs just check if CB address is valid */
5671 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5672 parser->user_cb_size,
5673 asic_prop->sram_user_base_address,
5674 asic_prop->sram_end_address))
5677 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5678 parser->user_cb_size,
5679 asic_prop->dram_user_base_address,
5680 asic_prop->dram_end_address))
5683 /* PMMU and HPMMU addresses are equal, check only one of them */
5684 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5685 parser->user_cb_size,
5686 asic_prop->pmmu.start_addr,
5687 asic_prop->pmmu.end_addr))
5691 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5692 parser->user_cb, parser->user_cb_size);
5697 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5699 struct gaudi_device *gaudi = hdev->asic_specific;
5701 if (parser->queue_type == QUEUE_TYPE_INT)
5702 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5704 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5705 return gaudi_parse_cb_mmu(hdev, parser);
5707 return gaudi_parse_cb_no_mmu(hdev, parser);
5710 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5711 void *kernel_address, u32 len,
5712 u64 cq_addr, u32 cq_val, u32 msi_vec,
5715 struct gaudi_device *gaudi = hdev->asic_specific;
5716 struct packet_msg_prot *cq_pkt;
5719 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5721 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5722 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5725 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5727 cq_pkt->ctl = cpu_to_le32(tmp);
5728 cq_pkt->value = cpu_to_le32(cq_val);
5729 cq_pkt->addr = cpu_to_le64(cq_addr);
5733 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5734 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5735 cq_pkt->ctl = cpu_to_le32(tmp);
5736 cq_pkt->value = cpu_to_le32(1);
5738 if (!gaudi->multi_msi_mode)
5741 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5744 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5746 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5749 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5752 struct packet_lin_dma *lin_dma_pkt;
5753 struct hl_cs_job *job;
5754 u32 cb_size, ctl, err_cause;
5759 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5763 lin_dma_pkt = cb->kernel_address;
5764 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5765 cb_size = sizeof(*lin_dma_pkt);
5767 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5768 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5769 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5770 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5771 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5773 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5774 lin_dma_pkt->src_addr = cpu_to_le64(val);
5775 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5776 lin_dma_pkt->tsize = cpu_to_le32(size);
5778 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5780 dev_err(hdev->dev, "Failed to allocate a new job\n");
5785 /* Verify DMA is OK */
5786 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5787 if (err_cause && !hdev->init_done) {
5789 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5791 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5796 atomic_inc(&job->user_cb->cs_cnt);
5797 job->user_cb_size = cb_size;
5798 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5799 job->patched_cb = job->user_cb;
5800 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5802 hl_debugfs_add_job(hdev, job);
5804 rc = gaudi_send_job_on_qman0(hdev, job);
5805 hl_debugfs_remove_job(hdev, job);
5807 atomic_dec(&cb->cs_cnt);
5809 /* Verify DMA is OK */
5810 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5812 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5814 if (!hdev->init_done) {
5816 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5818 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5825 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5830 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5831 u32 num_regs, u32 val)
5833 struct packet_msg_long *pkt;
5834 struct hl_cs_job *job;
5839 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5841 if (cb_size > SZ_2M) {
5842 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5846 cb = hl_cb_kernel_create(hdev, cb_size, false);
5850 pkt = cb->kernel_address;
5852 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5853 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5854 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5855 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5856 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5858 for (i = 0; i < num_regs ; i++, pkt++) {
5859 pkt->ctl = cpu_to_le32(ctl);
5860 pkt->value = cpu_to_le32(val);
5861 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5864 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5866 dev_err(hdev->dev, "Failed to allocate a new job\n");
5873 atomic_inc(&job->user_cb->cs_cnt);
5874 job->user_cb_size = cb_size;
5875 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5876 job->patched_cb = job->user_cb;
5877 job->job_cb_size = cb_size;
5879 hl_debugfs_add_job(hdev, job);
5881 rc = gaudi_send_job_on_qman0(hdev, job);
5882 hl_debugfs_remove_job(hdev, job);
5884 atomic_dec(&cb->cs_cnt);
5888 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5893 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5894 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5897 struct hl_pending_cb *pending_cb;
5898 struct packet_msg_long *pkt;
5903 mutex_lock(&hdev->fpriv_list_lock);
5904 ctx = hdev->compute_ctx;
5906 /* If no compute context available or context is going down
5907 * memset registers directly
5909 if (!ctx || kref_read(&ctx->refcount) == 0) {
5910 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5911 mutex_unlock(&hdev->fpriv_list_lock);
5915 mutex_unlock(&hdev->fpriv_list_lock);
5917 cb_size = (sizeof(*pkt) * num_regs) +
5918 sizeof(struct packet_msg_prot) * 2;
5920 if (cb_size > SZ_2M) {
5921 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5925 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5929 cb = hl_cb_kernel_create(hdev, cb_size, false);
5935 pkt = cb->kernel_address;
5937 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5938 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5939 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5940 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5941 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5943 for (i = 0; i < num_regs ; i++, pkt++) {
5944 pkt->ctl = cpu_to_le32(ctl);
5945 pkt->value = cpu_to_le32(val);
5946 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5949 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5951 pending_cb->cb = cb;
5952 pending_cb->cb_size = cb_size;
5953 /* The queue ID MUST be an external queue ID. Otherwise, we will
5954 * have undefined behavior
5956 pending_cb->hw_queue_id = hw_queue_id;
5958 spin_lock(&ctx->pending_cb_lock);
5959 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5960 spin_unlock(&ctx->pending_cb_lock);
5965 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5971 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5972 num_regs = NUM_OF_SOB_IN_BLOCK;
5973 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5975 dev_err(hdev->dev, "failed resetting SM registers");
5979 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5980 num_regs = NUM_OF_SOB_IN_BLOCK;
5981 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5983 dev_err(hdev->dev, "failed resetting SM registers");
5987 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5988 num_regs = NUM_OF_SOB_IN_BLOCK;
5989 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5991 dev_err(hdev->dev, "failed resetting SM registers");
5995 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5996 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5997 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5999 dev_err(hdev->dev, "failed resetting SM registers");
6003 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6004 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6005 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6007 dev_err(hdev->dev, "failed resetting SM registers");
6011 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6012 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6013 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6015 dev_err(hdev->dev, "failed resetting SM registers");
6019 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6020 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6021 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6022 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6024 dev_err(hdev->dev, "failed resetting SM registers");
6028 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6029 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6030 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6031 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6033 dev_err(hdev->dev, "failed resetting SM registers");
6040 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6042 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6043 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6046 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6047 u64 sob_addr = CFG_BASE +
6048 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6050 u32 dma_offset = i * DMA_CORE_OFFSET;
6052 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6053 lower_32_bits(sob_addr));
6054 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6055 upper_32_bits(sob_addr));
6056 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6058 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6059 * modified by the user for SRAM reduction
6062 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6067 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6072 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6073 qman_offset = i * DMA_QMAN_OFFSET;
6074 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6077 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6078 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6079 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6082 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6083 qman_offset = i * TPC_QMAN_OFFSET;
6084 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6087 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6088 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6089 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6090 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6094 static int gaudi_restore_user_registers(struct hl_device *hdev)
6098 rc = gaudi_restore_sm_registers(hdev);
6102 gaudi_restore_dma_registers(hdev);
6103 gaudi_restore_qm_registers(hdev);
6108 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6110 return gaudi_restore_user_registers(hdev);
6113 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6115 struct asic_fixed_properties *prop = &hdev->asic_prop;
6116 struct gaudi_device *gaudi = hdev->asic_specific;
6117 u64 addr = prop->mmu_pgt_addr;
6118 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6120 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6123 return gaudi_memset_device_memory(hdev, addr, size, 0);
6126 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6131 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6132 bool user_address, u32 *val)
6134 struct asic_fixed_properties *prop = &hdev->asic_prop;
6135 struct gaudi_device *gaudi = hdev->asic_specific;
6136 u64 hbm_bar_addr, host_phys_end;
6139 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6141 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6143 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6144 (hdev->clock_gating_mask &
6145 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6147 dev_err_ratelimited(hdev->dev,
6148 "Can't read register - clock gating is enabled!\n");
6151 *val = RREG32(addr - CFG_BASE);
6154 } else if ((addr >= SRAM_BASE_ADDR) &&
6155 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6156 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6157 (addr - SRAM_BASE_ADDR));
6158 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6159 u64 bar_base_addr = DRAM_PHYS_BASE +
6160 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6162 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6163 if (hbm_bar_addr != U64_MAX) {
6164 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6165 (addr - bar_base_addr));
6167 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6170 if (hbm_bar_addr == U64_MAX)
6172 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6173 user_address && !iommu_present(&pci_bus_type)) {
6174 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6182 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6183 bool user_address, u32 val)
6185 struct asic_fixed_properties *prop = &hdev->asic_prop;
6186 struct gaudi_device *gaudi = hdev->asic_specific;
6187 u64 hbm_bar_addr, host_phys_end;
6190 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6192 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6194 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6195 (hdev->clock_gating_mask &
6196 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6198 dev_err_ratelimited(hdev->dev,
6199 "Can't write register - clock gating is enabled!\n");
6202 WREG32(addr - CFG_BASE, val);
6205 } else if ((addr >= SRAM_BASE_ADDR) &&
6206 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6207 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6208 (addr - SRAM_BASE_ADDR));
6209 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6210 u64 bar_base_addr = DRAM_PHYS_BASE +
6211 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6213 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6214 if (hbm_bar_addr != U64_MAX) {
6215 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6216 (addr - bar_base_addr));
6218 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6221 if (hbm_bar_addr == U64_MAX)
6223 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6224 user_address && !iommu_present(&pci_bus_type)) {
6225 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6233 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6234 bool user_address, u64 *val)
6236 struct asic_fixed_properties *prop = &hdev->asic_prop;
6237 struct gaudi_device *gaudi = hdev->asic_specific;
6238 u64 hbm_bar_addr, host_phys_end;
6241 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6243 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6245 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6246 (hdev->clock_gating_mask &
6247 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6249 dev_err_ratelimited(hdev->dev,
6250 "Can't read register - clock gating is enabled!\n");
6253 u32 val_l = RREG32(addr - CFG_BASE);
6254 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6256 *val = (((u64) val_h) << 32) | val_l;
6259 } else if ((addr >= SRAM_BASE_ADDR) &&
6260 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6261 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6262 (addr - SRAM_BASE_ADDR));
6264 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6265 u64 bar_base_addr = DRAM_PHYS_BASE +
6266 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6268 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6269 if (hbm_bar_addr != U64_MAX) {
6270 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6271 (addr - bar_base_addr));
6273 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6276 if (hbm_bar_addr == U64_MAX)
6278 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6279 user_address && !iommu_present(&pci_bus_type)) {
6280 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6288 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6289 bool user_address, u64 val)
6291 struct asic_fixed_properties *prop = &hdev->asic_prop;
6292 struct gaudi_device *gaudi = hdev->asic_specific;
6293 u64 hbm_bar_addr, host_phys_end;
6296 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6298 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6300 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6301 (hdev->clock_gating_mask &
6302 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6304 dev_err_ratelimited(hdev->dev,
6305 "Can't write register - clock gating is enabled!\n");
6308 WREG32(addr - CFG_BASE, lower_32_bits(val));
6309 WREG32(addr + sizeof(u32) - CFG_BASE,
6310 upper_32_bits(val));
6313 } else if ((addr >= SRAM_BASE_ADDR) &&
6314 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6315 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6316 (addr - SRAM_BASE_ADDR));
6318 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6319 u64 bar_base_addr = DRAM_PHYS_BASE +
6320 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6322 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6323 if (hbm_bar_addr != U64_MAX) {
6324 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6325 (addr - bar_base_addr));
6327 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6330 if (hbm_bar_addr == U64_MAX)
6332 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6333 user_address && !iommu_present(&pci_bus_type)) {
6334 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6342 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6343 u32 size_to_dma, dma_addr_t dma_addr)
6349 dma_offset = dma_id * DMA_CORE_OFFSET;
6351 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6352 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6353 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6354 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6355 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6356 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6357 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6359 rc = hl_poll_timeout(
6361 mmDMA0_CORE_STS0 + dma_offset,
6363 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6369 "DMA %d timed-out during reading of 0x%llx\n",
6374 /* Verify DMA is OK */
6375 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6377 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6379 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6381 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6389 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6392 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6393 struct gaudi_device *gaudi = hdev->asic_specific;
6394 u64 dma_offset, qm_offset;
6395 dma_addr_t dma_addr;
6400 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6403 GFP_KERNEL | __GFP_ZERO);
6408 mutex_lock(&gaudi->clk_gate_mutex);
6410 hdev->asic_funcs->disable_clock_gating(hdev);
6412 hdev->asic_funcs->hw_queues_lock(hdev);
6414 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6415 dma_offset = dma_id * DMA_CORE_OFFSET;
6416 qm_offset = dma_id * DMA_QMAN_OFFSET;
6417 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6418 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6421 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6422 dma_offset = dma_id * DMA_CORE_OFFSET;
6423 qm_offset = dma_id * DMA_QMAN_OFFSET;
6424 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6425 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6428 dev_err_ratelimited(hdev->dev,
6429 "Can't read via DMA because it is BUSY\n");
6435 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6436 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6437 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6439 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6440 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6443 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6445 /* Verify DMA is OK */
6446 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6449 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6451 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6456 size_to_dma = SZ_2M;
6458 while (size_left > 0) {
6460 if (size_left < SZ_2M)
6461 size_to_dma = size_left;
6463 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6468 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6470 if (size_left <= SZ_2M)
6478 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6479 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6482 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6483 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6485 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6488 hdev->asic_funcs->hw_queues_unlock(hdev);
6490 hdev->asic_funcs->set_clock_gating(hdev);
6492 mutex_unlock(&gaudi->clk_gate_mutex);
6494 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6500 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6502 struct gaudi_device *gaudi = hdev->asic_specific;
6504 if (hdev->hard_reset_pending)
6507 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6508 (addr - gaudi->hbm_bar_cur_addr));
6511 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6513 struct gaudi_device *gaudi = hdev->asic_specific;
6515 if (hdev->hard_reset_pending)
6518 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6519 (addr - gaudi->hbm_bar_cur_addr));
6522 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6524 /* mask to zero the MMBP and ASID bits */
6525 WREG32_AND(reg, ~0x7FF);
6526 WREG32_OR(reg, asid);
6529 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6531 struct gaudi_device *gaudi = hdev->asic_specific;
6533 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6536 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6537 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6541 mutex_lock(&gaudi->clk_gate_mutex);
6543 hdev->asic_funcs->disable_clock_gating(hdev);
6545 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6546 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6547 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6548 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6549 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6551 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6552 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6553 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6554 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6555 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6557 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6558 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6559 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6560 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6561 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6563 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6564 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6565 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6571 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6582 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6583 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6594 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6595 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6599 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6600 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6602 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6603 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6605 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6606 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6607 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6608 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6610 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6611 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6612 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6613 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6614 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6618 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6619 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6620 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6621 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6622 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6623 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6624 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6626 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6627 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6628 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6629 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6630 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6631 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6632 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6634 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6635 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6636 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6637 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6638 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6639 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6640 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6642 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6643 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6644 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6645 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6646 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6647 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6648 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6650 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6651 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6652 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6653 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6654 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6655 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6656 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6658 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6659 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6660 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6661 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6662 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6663 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6664 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6666 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6667 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6668 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6669 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6670 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6671 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6672 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6673 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6674 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6675 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6677 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6678 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6679 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6680 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6681 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6682 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6683 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6684 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6685 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6686 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6687 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6688 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6690 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6691 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6693 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6695 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6697 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6699 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6703 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6704 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6706 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6708 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6710 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6712 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6716 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6717 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6719 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6721 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6723 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6725 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6729 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6730 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6732 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6734 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6736 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6738 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6742 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6743 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6745 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6747 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6749 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6751 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6755 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6756 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6758 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6760 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6762 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6764 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6768 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6769 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6771 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6773 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6775 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6777 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6781 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6782 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6784 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6786 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6788 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6790 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6794 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6795 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6797 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6799 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6801 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6803 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6807 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6808 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6810 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6812 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6814 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6816 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6820 hdev->asic_funcs->set_clock_gating(hdev);
6822 mutex_unlock(&gaudi->clk_gate_mutex);
6825 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6826 struct hl_cs_job *job)
6828 struct packet_msg_prot *fence_pkt;
6830 dma_addr_t fence_dma_addr;
6832 u32 tmp, timeout, dma_offset;
6836 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6838 timeout = HL_DEVICE_TIMEOUT_USEC;
6840 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6841 dev_err_ratelimited(hdev->dev,
6842 "Can't send driver job on QMAN0 because the device is not idle\n");
6846 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6850 "Failed to allocate fence memory for QMAN0\n");
6854 cb = job->patched_cb;
6856 fence_pkt = cb->kernel_address +
6857 job->job_cb_size - sizeof(struct packet_msg_prot);
6859 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6860 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6861 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6863 fence_pkt->ctl = cpu_to_le32(tmp);
6864 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6865 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6867 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6869 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6871 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6872 job->job_cb_size, cb->bus_address);
6874 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6875 goto free_fence_ptr;
6878 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6879 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6882 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6884 if (rc == -ETIMEDOUT) {
6885 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6886 goto free_fence_ptr;
6890 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6891 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6893 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6898 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6900 if (event_type >= GAUDI_EVENT_SIZE)
6901 goto event_not_supported;
6903 if (!gaudi_irq_map_table[event_type].valid)
6904 goto event_not_supported;
6906 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6910 event_not_supported:
6911 snprintf(desc, size, "N/A");
6914 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6915 u32 x_y, bool is_write)
6917 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6919 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6920 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6923 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6924 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6928 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6929 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6933 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6934 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6938 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6939 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6944 goto unknown_initiator;
6947 for (i = 0 ; i < 2 ; i++) {
6948 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6949 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6953 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6954 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6955 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6957 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6960 return "DMA0 or DMA2";
6961 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6962 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6963 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6965 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6968 return "DMA1 or DMA3";
6969 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6970 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6971 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6973 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6976 return "DMA4 or DMA6";
6977 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6978 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6979 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6981 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6984 return "DMA5 or DMA7";
6988 return "unknown initiator";
6991 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6994 u32 val, x_y, axi_id;
6996 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6997 RREG32(mmMMU_UP_RAZWI_READ_ID);
6998 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6999 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7000 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7001 RAZWI_INITIATOR_AXI_ID_SHIFT);
7004 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7005 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7007 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7010 case RAZWI_INITIATOR_ID_X_Y_TPC1:
7012 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7013 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7015 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7016 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7018 case RAZWI_INITIATOR_ID_X_Y_TPC2:
7020 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7021 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7023 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7025 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7027 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7030 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7031 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7032 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7033 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7034 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7035 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7036 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7037 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7038 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7039 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7040 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7042 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7044 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7047 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7049 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7050 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7052 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7053 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7055 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7057 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7058 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7060 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7062 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7070 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7072 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7073 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7074 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7075 RAZWI_INITIATOR_AXI_ID_MASK);
7077 return "unknown initiator";
7080 static void gaudi_print_razwi_info(struct hl_device *hdev)
7082 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7083 dev_err_ratelimited(hdev->dev,
7084 "RAZWI event caused by illegal write of %s\n",
7085 gaudi_get_razwi_initiator_name(hdev, true));
7086 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7089 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7090 dev_err_ratelimited(hdev->dev,
7091 "RAZWI event caused by illegal read of %s\n",
7092 gaudi_get_razwi_initiator_name(hdev, false));
7093 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7097 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7099 struct gaudi_device *gaudi = hdev->asic_specific;
7103 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7106 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7107 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7108 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7110 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7112 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7115 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7118 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7119 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7120 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7122 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7124 dev_err_ratelimited(hdev->dev,
7125 "MMU access error on va 0x%llx\n", addr);
7127 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7132 * +-------------------+------------------------------------------------------+
7133 * | Configuration Reg | Description |
7135 * +-------------------+------------------------------------------------------+
7136 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7137 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7138 * | |0xF34 memory wrappers 63:32 |
7139 * | |0xF38 memory wrappers 95:64 |
7140 * | |0xF3C memory wrappers 127:96 |
7141 * +-------------------+------------------------------------------------------+
7142 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7143 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7144 * | |0xF44 memory wrappers 63:32 |
7145 * | |0xF48 memory wrappers 95:64 |
7146 * | |0xF4C memory wrappers 127:96 |
7147 * +-------------------+------------------------------------------------------+
7149 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7150 struct ecc_info_extract_params *params, u64 *ecc_address,
7151 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7153 struct gaudi_device *gaudi = hdev->asic_specific;
7154 u32 i, num_mem_regs, reg, err_bit;
7155 u64 err_addr, err_word = 0;
7158 num_mem_regs = params->num_memories / 32 +
7159 ((params->num_memories % 32) ? 1 : 0);
7161 if (params->block_address >= CFG_BASE)
7162 params->block_address -= CFG_BASE;
7165 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7167 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7169 if (params->disable_clock_gating) {
7170 mutex_lock(&gaudi->clk_gate_mutex);
7171 hdev->asic_funcs->disable_clock_gating(hdev);
7174 /* Set invalid wrapper index */
7175 *memory_wrapper_idx = 0xFF;
7177 /* Iterate through memory wrappers, a single bit must be set */
7178 for (i = 0 ; i < num_mem_regs ; i++) {
7180 err_word = RREG32(err_addr);
7182 err_bit = __ffs(err_word);
7183 *memory_wrapper_idx = err_bit + (32 * i);
7188 if (*memory_wrapper_idx == 0xFF) {
7189 dev_err(hdev->dev, "ECC error information cannot be found\n");
7191 goto enable_clk_gate;
7194 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7195 *memory_wrapper_idx);
7198 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7200 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7202 /* Clear error indication */
7203 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7205 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7207 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7209 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7212 if (params->disable_clock_gating) {
7213 hdev->asic_funcs->set_clock_gating(hdev);
7215 mutex_unlock(&gaudi->clk_gate_mutex);
7222 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7224 * @idx: the current pi/ci value
7225 * @q_len: the queue length (power of 2)
7227 * @return the cyclically decremented index
7229 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7231 u32 mask = q_len - 1;
7234 * modular decrement is equivalent to adding (queue_size -1)
7235 * later we take LSBs to make sure the value is in the
7236 * range [0, queue_len - 1]
7238 return (idx + q_len - 1) & mask;
7242 * gaudi_print_sw_config_stream_data - print SW config stream data
7244 * @hdev: pointer to the habanalabs device structure
7245 * @stream: the QMAN's stream
7246 * @qman_base: base address of QMAN registers block
7248 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7251 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7252 u32 cq_ptr_lo_off, size;
7254 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7256 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7257 stream * cq_ptr_lo_off;
7258 cq_ptr_hi = cq_ptr_lo +
7259 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7260 cq_tsize = cq_ptr_lo +
7261 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7263 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7264 size = RREG32(cq_tsize);
7265 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7266 stream, cq_ptr, size);
7270 * gaudi_print_last_pqes_on_err - print last PQEs on error
7272 * @hdev: pointer to the habanalabs device structure
7273 * @qid_base: first QID of the QMAN (out of 4 streams)
7274 * @stream: the QMAN's stream
7275 * @qman_base: base address of QMAN registers block
7276 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7278 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7279 u32 stream, u64 qman_base,
7282 u32 ci, qm_ci_stream_off, queue_len;
7283 struct hl_hw_queue *q;
7287 q = &hdev->kernel_queues[qid_base + stream];
7289 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7290 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7291 stream * qm_ci_stream_off;
7293 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7294 q->int_queue_len : HL_QUEUE_LENGTH;
7296 hdev->asic_funcs->hw_queues_lock(hdev);
7299 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7303 /* we should start printing form ci -1 */
7304 ci = gaudi_queue_idx_dec(ci, queue_len);
7306 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7311 bd = q->kernel_address;
7314 len = le32_to_cpu(bd->len);
7315 /* len 0 means uninitialized entry- break */
7319 addr = le64_to_cpu(bd->ptr);
7321 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7322 stream, ci, addr, len);
7324 /* get previous ci, wrap if needed */
7325 ci = gaudi_queue_idx_dec(ci, queue_len);
7328 hdev->asic_funcs->hw_queues_unlock(hdev);
7332 * print_qman_data_on_err - extract QMAN data on error
7334 * @hdev: pointer to the habanalabs device structure
7335 * @qid_base: first QID of the QMAN (out of 4 streams)
7336 * @stream: the QMAN's stream
7337 * @qman_base: base address of QMAN registers block
7339 * This function attempt to exatract as much data as possible on QMAN error.
7340 * On upper CP print the SW config stream data and last 8 PQEs.
7341 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7343 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7344 u32 stream, u64 qman_base)
7348 if (stream != QMAN_STREAMS) {
7349 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7354 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7356 for (i = 0; i < QMAN_STREAMS; i++)
7357 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7361 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7362 const char *qm_name,
7366 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7367 u64 glbl_sts_addr, arb_err_addr;
7370 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7371 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7373 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7374 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7375 glbl_sts_clr_val = 0;
7376 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7381 if (i == QMAN_STREAMS)
7382 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7384 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7386 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7387 if (glbl_sts_val & BIT(j)) {
7388 dev_err_ratelimited(hdev->dev,
7389 "%s %s. err cause: %s\n",
7391 gaudi_qman_error_cause[j]);
7392 glbl_sts_clr_val |= BIT(j);
7396 /* Write 1 clear errors */
7397 if (!hdev->stop_on_err)
7398 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7400 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7403 arb_err_val = RREG32(arb_err_addr);
7408 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7409 if (arb_err_val & BIT(j)) {
7410 dev_err_ratelimited(hdev->dev,
7411 "%s ARB_ERR. err cause: %s\n",
7413 gaudi_qman_arb_error_cause[j]);
7418 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7419 struct hl_eq_sm_sei_data *sei_data)
7421 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7423 /* Flip the bits as the enum is ordered in the opposite way */
7424 index = (index ^ 0x3) & 0x3;
7426 switch (sei_data->sei_cause) {
7427 case SM_SEI_SO_OVERFLOW:
7428 dev_err_ratelimited(hdev->dev,
7429 "%s SEI Error: SOB Group %u overflow/underflow",
7430 gaudi_sync_manager_names[index],
7431 le32_to_cpu(sei_data->sei_log));
7433 case SM_SEI_LBW_4B_UNALIGNED:
7434 dev_err_ratelimited(hdev->dev,
7435 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7436 gaudi_sync_manager_names[index],
7437 le32_to_cpu(sei_data->sei_log));
7439 case SM_SEI_AXI_RESPONSE_ERR:
7440 dev_err_ratelimited(hdev->dev,
7441 "%s SEI Error: AXI ID %u response error",
7442 gaudi_sync_manager_names[index],
7443 le32_to_cpu(sei_data->sei_log));
7446 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7447 le32_to_cpu(sei_data->sei_log));
7452 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7453 struct hl_eq_ecc_data *ecc_data)
7455 struct ecc_info_extract_params params;
7456 u64 ecc_address = 0, ecc_syndrom = 0;
7457 u8 index, memory_wrapper_idx = 0;
7458 bool extract_info_from_fw;
7461 switch (event_type) {
7462 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7463 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7464 extract_info_from_fw = true;
7466 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7467 index = event_type - GAUDI_EVENT_TPC0_SERR;
7468 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7469 params.num_memories = 90;
7470 params.derr = false;
7471 params.disable_clock_gating = true;
7472 extract_info_from_fw = false;
7474 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7475 index = event_type - GAUDI_EVENT_TPC0_DERR;
7476 params.block_address =
7477 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7478 params.num_memories = 90;
7480 params.disable_clock_gating = true;
7481 extract_info_from_fw = false;
7483 case GAUDI_EVENT_MME0_ACC_SERR:
7484 case GAUDI_EVENT_MME1_ACC_SERR:
7485 case GAUDI_EVENT_MME2_ACC_SERR:
7486 case GAUDI_EVENT_MME3_ACC_SERR:
7487 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7488 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7489 params.num_memories = 128;
7490 params.derr = false;
7491 params.disable_clock_gating = true;
7492 extract_info_from_fw = false;
7494 case GAUDI_EVENT_MME0_ACC_DERR:
7495 case GAUDI_EVENT_MME1_ACC_DERR:
7496 case GAUDI_EVENT_MME2_ACC_DERR:
7497 case GAUDI_EVENT_MME3_ACC_DERR:
7498 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7499 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7500 params.num_memories = 128;
7502 params.disable_clock_gating = true;
7503 extract_info_from_fw = false;
7505 case GAUDI_EVENT_MME0_SBAB_SERR:
7506 case GAUDI_EVENT_MME1_SBAB_SERR:
7507 case GAUDI_EVENT_MME2_SBAB_SERR:
7508 case GAUDI_EVENT_MME3_SBAB_SERR:
7509 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7510 params.block_address =
7511 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7512 params.num_memories = 33;
7513 params.derr = false;
7514 params.disable_clock_gating = true;
7515 extract_info_from_fw = false;
7517 case GAUDI_EVENT_MME0_SBAB_DERR:
7518 case GAUDI_EVENT_MME1_SBAB_DERR:
7519 case GAUDI_EVENT_MME2_SBAB_DERR:
7520 case GAUDI_EVENT_MME3_SBAB_DERR:
7521 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7522 params.block_address =
7523 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7524 params.num_memories = 33;
7526 params.disable_clock_gating = true;
7527 extract_info_from_fw = false;
7533 if (extract_info_from_fw) {
7534 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7535 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7536 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7538 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7539 &ecc_syndrom, &memory_wrapper_idx);
7545 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7546 ecc_address, ecc_syndrom, memory_wrapper_idx);
7549 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7556 switch (event_type) {
7557 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7558 index = event_type - GAUDI_EVENT_TPC0_QM;
7559 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7560 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7561 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7563 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7564 index = event_type - GAUDI_EVENT_MME0_QM;
7565 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7566 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7567 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7569 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7570 index = event_type - GAUDI_EVENT_DMA0_QM;
7571 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7572 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7575 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7576 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7578 case GAUDI_EVENT_NIC0_QM0:
7579 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7580 qman_base = mmNIC0_QM0_BASE;
7581 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7583 case GAUDI_EVENT_NIC0_QM1:
7584 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7585 qman_base = mmNIC0_QM1_BASE;
7586 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7588 case GAUDI_EVENT_NIC1_QM0:
7589 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7590 qman_base = mmNIC1_QM0_BASE;
7591 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7593 case GAUDI_EVENT_NIC1_QM1:
7594 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7595 qman_base = mmNIC1_QM1_BASE;
7596 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7598 case GAUDI_EVENT_NIC2_QM0:
7599 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7600 qman_base = mmNIC2_QM0_BASE;
7601 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7603 case GAUDI_EVENT_NIC2_QM1:
7604 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7605 qman_base = mmNIC2_QM1_BASE;
7606 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7608 case GAUDI_EVENT_NIC3_QM0:
7609 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7610 qman_base = mmNIC3_QM0_BASE;
7611 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7613 case GAUDI_EVENT_NIC3_QM1:
7614 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7615 qman_base = mmNIC3_QM1_BASE;
7616 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7618 case GAUDI_EVENT_NIC4_QM0:
7619 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7620 qman_base = mmNIC4_QM0_BASE;
7621 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7623 case GAUDI_EVENT_NIC4_QM1:
7624 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7625 qman_base = mmNIC4_QM1_BASE;
7626 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7632 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7635 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7640 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7641 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7645 gaudi_print_razwi_info(hdev);
7646 gaudi_print_mmu_error_info(hdev);
7650 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7651 struct cpucp_pkt_sync_err *sync_err)
7653 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7655 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7656 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7659 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7660 struct hl_eq_fw_alive *fw_alive)
7663 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7664 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7665 "Minor" : "Critical", fw_alive->process_id,
7666 fw_alive->thread_id, fw_alive->uptime_seconds);
7669 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7671 struct gaudi_device *gaudi = hdev->asic_specific;
7673 /* Unmask all IRQs since some could have been received
7674 * during the soft reset
7676 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7679 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7680 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7682 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7685 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7686 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7687 if (!hbm_ecc_data) {
7688 dev_err(hdev->dev, "No FW ECC data");
7692 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7693 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7694 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7695 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7696 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7697 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7698 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7699 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7700 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7701 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7702 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7703 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7704 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7705 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7708 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7709 device, ch, wr_par, rd_par, ca_par, serr, derr);
7711 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7712 device, ch, hbm_ecc_data->first_addr, type,
7713 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7714 hbm_ecc_data->dec_cnt);
7718 if (hdev->asic_prop.fw_security_enabled) {
7719 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7723 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7724 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7725 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7726 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7730 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7731 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7732 (val >> 2) & 0x1, (val >> 3) & 0x1,
7735 val2 = RREG32(base + ch * 0x1000 + 0x060);
7737 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7739 RREG32(base + ch * 0x1000 + 0x064),
7740 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7741 (val2 & 0xFF0000) >> 16,
7742 (val2 & 0xFF000000) >> 24);
7745 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7746 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7750 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7751 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7752 (val >> 2) & 0x1, (val >> 3) & 0x1,
7755 val2 = RREG32(base + ch * 0x1000 + 0x070);
7757 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7759 RREG32(base + ch * 0x1000 + 0x074),
7760 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7761 (val2 & 0xFF0000) >> 16,
7762 (val2 & 0xFF000000) >> 24);
7765 /* Clear interrupts */
7766 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7767 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7768 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7769 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7770 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7771 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7774 val = RREG32(base + 0x8F30);
7775 val2 = RREG32(base + 0x8F34);
7779 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7782 val = RREG32(base + 0x8F40);
7783 val2 = RREG32(base + 0x8F44);
7787 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7794 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7796 switch (hbm_event_type) {
7797 case GAUDI_EVENT_HBM0_SPI_0:
7798 case GAUDI_EVENT_HBM0_SPI_1:
7800 case GAUDI_EVENT_HBM1_SPI_0:
7801 case GAUDI_EVENT_HBM1_SPI_1:
7803 case GAUDI_EVENT_HBM2_SPI_0:
7804 case GAUDI_EVENT_HBM2_SPI_1:
7806 case GAUDI_EVENT_HBM3_SPI_0:
7807 case GAUDI_EVENT_HBM3_SPI_1:
7813 /* Should never happen */
7817 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7818 char *interrupt_name)
7820 struct gaudi_device *gaudi = hdev->asic_specific;
7821 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7822 bool soft_reset_required = false;
7824 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7825 * gating, and thus cannot be done in CPU-CP and should be done instead
7829 mutex_lock(&gaudi->clk_gate_mutex);
7831 hdev->asic_funcs->disable_clock_gating(hdev);
7833 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7834 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7836 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7837 if (tpc_interrupts_cause & BIT(i)) {
7838 dev_err_ratelimited(hdev->dev,
7839 "TPC%d_%s interrupt cause: %s\n",
7840 tpc_id, interrupt_name,
7841 gaudi_tpc_interrupts_cause[i]);
7842 /* If this is QM error, we need to soft-reset */
7844 soft_reset_required = true;
7847 /* Clear interrupts */
7848 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7850 hdev->asic_funcs->set_clock_gating(hdev);
7852 mutex_unlock(&gaudi->clk_gate_mutex);
7854 return soft_reset_required;
7857 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7859 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7862 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7864 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7867 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7870 switch (event_type) {
7871 case GAUDI_EVENT_FIX_POWER_ENV_S:
7872 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7873 dev_info_ratelimited(hdev->dev,
7874 "Clock throttling due to power consumption\n");
7877 case GAUDI_EVENT_FIX_POWER_ENV_E:
7878 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7879 dev_info_ratelimited(hdev->dev,
7880 "Power envelop is safe, back to optimal clock\n");
7883 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7884 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7885 dev_info_ratelimited(hdev->dev,
7886 "Clock throttling due to overheating\n");
7889 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7890 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7891 dev_info_ratelimited(hdev->dev,
7892 "Thermal envelop is safe, back to optimal clock\n");
7896 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7902 static void gaudi_handle_eqe(struct hl_device *hdev,
7903 struct hl_eq_entry *eq_entry)
7905 struct gaudi_device *gaudi = hdev->asic_specific;
7906 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7907 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7908 >> EQ_CTL_EVENT_TYPE_SHIFT);
7909 bool reset_required;
7913 gaudi->events_stat[event_type]++;
7914 gaudi->events_stat_aggregate[event_type]++;
7916 switch (event_type) {
7917 case GAUDI_EVENT_PCIE_CORE_DERR:
7918 case GAUDI_EVENT_PCIE_IF_DERR:
7919 case GAUDI_EVENT_PCIE_PHY_DERR:
7920 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7921 case GAUDI_EVENT_MME0_ACC_DERR:
7922 case GAUDI_EVENT_MME0_SBAB_DERR:
7923 case GAUDI_EVENT_MME1_ACC_DERR:
7924 case GAUDI_EVENT_MME1_SBAB_DERR:
7925 case GAUDI_EVENT_MME2_ACC_DERR:
7926 case GAUDI_EVENT_MME2_SBAB_DERR:
7927 case GAUDI_EVENT_MME3_ACC_DERR:
7928 case GAUDI_EVENT_MME3_SBAB_DERR:
7929 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7931 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7932 case GAUDI_EVENT_PSOC_MEM_DERR:
7933 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7934 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7935 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7936 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7937 case GAUDI_EVENT_MMU_DERR:
7938 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7939 gaudi_print_irq_info(hdev, event_type, true);
7940 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7943 case GAUDI_EVENT_GIC500:
7944 case GAUDI_EVENT_AXI_ECC:
7945 case GAUDI_EVENT_L2_RAM_ECC:
7946 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7947 gaudi_print_irq_info(hdev, event_type, false);
7950 case GAUDI_EVENT_HBM0_SPI_0:
7951 case GAUDI_EVENT_HBM1_SPI_0:
7952 case GAUDI_EVENT_HBM2_SPI_0:
7953 case GAUDI_EVENT_HBM3_SPI_0:
7954 gaudi_print_irq_info(hdev, event_type, false);
7955 gaudi_hbm_read_interrupts(hdev,
7956 gaudi_hbm_event_to_dev(event_type),
7957 &eq_entry->hbm_ecc_data);
7960 case GAUDI_EVENT_HBM0_SPI_1:
7961 case GAUDI_EVENT_HBM1_SPI_1:
7962 case GAUDI_EVENT_HBM2_SPI_1:
7963 case GAUDI_EVENT_HBM3_SPI_1:
7964 gaudi_print_irq_info(hdev, event_type, false);
7965 gaudi_hbm_read_interrupts(hdev,
7966 gaudi_hbm_event_to_dev(event_type),
7967 &eq_entry->hbm_ecc_data);
7968 hl_fw_unmask_irq(hdev, event_type);
7971 case GAUDI_EVENT_TPC0_DEC:
7972 case GAUDI_EVENT_TPC1_DEC:
7973 case GAUDI_EVENT_TPC2_DEC:
7974 case GAUDI_EVENT_TPC3_DEC:
7975 case GAUDI_EVENT_TPC4_DEC:
7976 case GAUDI_EVENT_TPC5_DEC:
7977 case GAUDI_EVENT_TPC6_DEC:
7978 case GAUDI_EVENT_TPC7_DEC:
7979 gaudi_print_irq_info(hdev, event_type, true);
7980 reset_required = gaudi_tpc_read_interrupts(hdev,
7981 tpc_dec_event_to_tpc_id(event_type),
7982 "AXI_SLV_DEC_Error");
7983 if (reset_required) {
7984 dev_err(hdev->dev, "hard reset required due to %s\n",
7985 gaudi_irq_map_table[event_type].name);
7989 hl_fw_unmask_irq(hdev, event_type);
7993 case GAUDI_EVENT_TPC0_KRN_ERR:
7994 case GAUDI_EVENT_TPC1_KRN_ERR:
7995 case GAUDI_EVENT_TPC2_KRN_ERR:
7996 case GAUDI_EVENT_TPC3_KRN_ERR:
7997 case GAUDI_EVENT_TPC4_KRN_ERR:
7998 case GAUDI_EVENT_TPC5_KRN_ERR:
7999 case GAUDI_EVENT_TPC6_KRN_ERR:
8000 case GAUDI_EVENT_TPC7_KRN_ERR:
8001 gaudi_print_irq_info(hdev, event_type, true);
8002 reset_required = gaudi_tpc_read_interrupts(hdev,
8003 tpc_krn_event_to_tpc_id(event_type),
8005 if (reset_required) {
8006 dev_err(hdev->dev, "hard reset required due to %s\n",
8007 gaudi_irq_map_table[event_type].name);
8011 hl_fw_unmask_irq(hdev, event_type);
8015 case GAUDI_EVENT_PCIE_CORE_SERR:
8016 case GAUDI_EVENT_PCIE_IF_SERR:
8017 case GAUDI_EVENT_PCIE_PHY_SERR:
8018 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8019 case GAUDI_EVENT_MME0_ACC_SERR:
8020 case GAUDI_EVENT_MME0_SBAB_SERR:
8021 case GAUDI_EVENT_MME1_ACC_SERR:
8022 case GAUDI_EVENT_MME1_SBAB_SERR:
8023 case GAUDI_EVENT_MME2_ACC_SERR:
8024 case GAUDI_EVENT_MME2_SBAB_SERR:
8025 case GAUDI_EVENT_MME3_ACC_SERR:
8026 case GAUDI_EVENT_MME3_SBAB_SERR:
8027 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8028 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8029 case GAUDI_EVENT_PSOC_MEM_SERR:
8030 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8031 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8032 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8033 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8035 case GAUDI_EVENT_MMU_SERR:
8036 gaudi_print_irq_info(hdev, event_type, true);
8037 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8038 hl_fw_unmask_irq(hdev, event_type);
8041 case GAUDI_EVENT_PCIE_DEC:
8042 case GAUDI_EVENT_MME0_WBC_RSP:
8043 case GAUDI_EVENT_MME0_SBAB0_RSP:
8044 case GAUDI_EVENT_MME1_WBC_RSP:
8045 case GAUDI_EVENT_MME1_SBAB0_RSP:
8046 case GAUDI_EVENT_MME2_WBC_RSP:
8047 case GAUDI_EVENT_MME2_SBAB0_RSP:
8048 case GAUDI_EVENT_MME3_WBC_RSP:
8049 case GAUDI_EVENT_MME3_SBAB0_RSP:
8050 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8051 case GAUDI_EVENT_PSOC_AXI_DEC:
8052 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8053 case GAUDI_EVENT_MMU_PAGE_FAULT:
8054 case GAUDI_EVENT_MMU_WR_PERM:
8055 case GAUDI_EVENT_RAZWI_OR_ADC:
8056 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8057 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8058 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8060 case GAUDI_EVENT_NIC0_QM0:
8061 case GAUDI_EVENT_NIC0_QM1:
8062 case GAUDI_EVENT_NIC1_QM0:
8063 case GAUDI_EVENT_NIC1_QM1:
8064 case GAUDI_EVENT_NIC2_QM0:
8065 case GAUDI_EVENT_NIC2_QM1:
8066 case GAUDI_EVENT_NIC3_QM0:
8067 case GAUDI_EVENT_NIC3_QM1:
8068 case GAUDI_EVENT_NIC4_QM0:
8069 case GAUDI_EVENT_NIC4_QM1:
8070 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8071 gaudi_print_irq_info(hdev, event_type, true);
8072 gaudi_handle_qman_err(hdev, event_type);
8073 hl_fw_unmask_irq(hdev, event_type);
8076 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8077 gaudi_print_irq_info(hdev, event_type, true);
8080 case GAUDI_EVENT_TPC0_BMON_SPMU:
8081 case GAUDI_EVENT_TPC1_BMON_SPMU:
8082 case GAUDI_EVENT_TPC2_BMON_SPMU:
8083 case GAUDI_EVENT_TPC3_BMON_SPMU:
8084 case GAUDI_EVENT_TPC4_BMON_SPMU:
8085 case GAUDI_EVENT_TPC5_BMON_SPMU:
8086 case GAUDI_EVENT_TPC6_BMON_SPMU:
8087 case GAUDI_EVENT_TPC7_BMON_SPMU:
8088 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8089 gaudi_print_irq_info(hdev, event_type, false);
8090 hl_fw_unmask_irq(hdev, event_type);
8093 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8094 gaudi_print_irq_info(hdev, event_type, false);
8095 gaudi_print_sm_sei_info(hdev, event_type,
8096 &eq_entry->sm_sei_data);
8097 rc = hl_state_dump(hdev);
8100 "Error during system state dump %d\n", rc);
8101 hl_fw_unmask_irq(hdev, event_type);
8104 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8105 gaudi_print_clk_change_info(hdev, event_type);
8106 hl_fw_unmask_irq(hdev, event_type);
8109 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8110 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8112 "Received high temp H/W interrupt %d (cause %d)\n",
8116 case GAUDI_EVENT_DEV_RESET_REQ:
8117 gaudi_print_irq_info(hdev, event_type, false);
8120 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8121 gaudi_print_irq_info(hdev, event_type, false);
8122 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8125 case GAUDI_EVENT_FW_ALIVE_S:
8126 gaudi_print_irq_info(hdev, event_type, false);
8127 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8131 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8139 if (hdev->hard_reset_on_fw_events)
8140 hl_device_reset(hdev, HL_RESET_HARD);
8142 hl_fw_unmask_irq(hdev, event_type);
8145 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8148 struct gaudi_device *gaudi = hdev->asic_specific;
8151 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8152 return gaudi->events_stat_aggregate;
8155 *size = (u32) sizeof(gaudi->events_stat);
8156 return gaudi->events_stat;
8159 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8162 struct gaudi_device *gaudi = hdev->asic_specific;
8163 u32 status, timeout_usec;
8166 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8167 hdev->hard_reset_pending)
8171 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8173 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8175 /* L0 & L1 invalidation */
8176 WREG32(mmSTLB_INV_PS, 3);
8177 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8178 WREG32(mmSTLB_INV_PS, 2);
8180 rc = hl_poll_timeout(
8188 WREG32(mmSTLB_INV_SET, 0);
8191 dev_err_ratelimited(hdev->dev,
8192 "MMU cache invalidation timeout\n");
8193 hl_device_reset(hdev, HL_RESET_HARD);
8199 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8200 bool is_hard, u32 flags,
8201 u32 asid, u64 va, u64 size)
8203 /* Treat as invalidate all because there is no range invalidation
8206 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8209 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8210 u32 asid, u64 phys_addr)
8212 u32 status, timeout_usec;
8216 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8218 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8220 WREG32(MMU_ASID, asid);
8221 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8222 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8223 WREG32(MMU_BUSY, 0x80000000);
8225 rc = hl_poll_timeout(
8229 !(status & 0x80000000),
8235 "Timeout during MMU hop0 config of asid %d\n", asid);
8242 static int gaudi_send_heartbeat(struct hl_device *hdev)
8244 struct gaudi_device *gaudi = hdev->asic_specific;
8246 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8249 return hl_fw_send_heartbeat(hdev);
8252 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8254 struct gaudi_device *gaudi = hdev->asic_specific;
8255 struct asic_fixed_properties *prop = &hdev->asic_prop;
8258 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8261 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8262 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8267 if (!strlen(prop->cpucp_info.card_name))
8268 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8271 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8273 set_default_power_values(hdev);
8275 hdev->max_power = prop->max_power_default;
8280 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8281 u8 mask_len, struct seq_file *s)
8283 struct gaudi_device *gaudi = hdev->asic_specific;
8284 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8285 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8286 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8287 unsigned long *mask = (unsigned long *)mask_arr;
8288 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8289 bool is_idle = true, is_eng_idle, is_slave;
8291 int i, dma_id, port;
8293 mutex_lock(&gaudi->clk_gate_mutex);
8295 hdev->asic_funcs->disable_clock_gating(hdev);
8299 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8300 "--- ------- ------------ ---------- -------------\n");
8302 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8303 dma_id = gaudi_dma_assignment[i];
8304 offset = dma_id * DMA_QMAN_OFFSET;
8306 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8307 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8308 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8309 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8310 IS_DMA_IDLE(dma_core_sts0);
8311 is_idle &= is_eng_idle;
8313 if (mask && !is_eng_idle)
8314 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8316 seq_printf(s, fmt, dma_id,
8317 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8318 qm_cgm_sts, dma_core_sts0);
8323 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8324 "--- ------- ------------ ---------- ----------\n");
8326 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8327 offset = i * TPC_QMAN_OFFSET;
8328 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8329 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8330 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8331 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8332 IS_TPC_IDLE(tpc_cfg_sts);
8333 is_idle &= is_eng_idle;
8335 if (mask && !is_eng_idle)
8336 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8338 seq_printf(s, fmt, i,
8339 is_eng_idle ? "Y" : "N",
8340 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8345 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8346 "--- ------- ------------ ---------- -----------\n");
8348 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8349 offset = i * MME_QMAN_OFFSET;
8350 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8351 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8353 /* MME 1 & 3 are slaves, no need to check their QMANs */
8356 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8357 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8358 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8361 is_idle &= is_eng_idle;
8363 if (mask && !is_eng_idle)
8364 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8367 seq_printf(s, fmt, i,
8368 is_eng_idle ? "Y" : "N",
8369 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8371 seq_printf(s, mme_slave_fmt, i,
8372 is_eng_idle ? "Y" : "N", "-",
8378 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8379 "--- ------- ------------ ----------\n");
8381 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8382 offset = i * NIC_MACRO_QMAN_OFFSET;
8384 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8385 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8386 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8387 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8388 is_idle &= is_eng_idle;
8390 if (mask && !is_eng_idle)
8391 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8393 seq_printf(s, nic_fmt, port,
8394 is_eng_idle ? "Y" : "N",
8395 qm_glbl_sts0, qm_cgm_sts);
8399 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8400 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8401 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8402 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8403 is_idle &= is_eng_idle;
8405 if (mask && !is_eng_idle)
8406 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8408 seq_printf(s, nic_fmt, port,
8409 is_eng_idle ? "Y" : "N",
8410 qm_glbl_sts0, qm_cgm_sts);
8417 hdev->asic_funcs->set_clock_gating(hdev);
8419 mutex_unlock(&gaudi->clk_gate_mutex);
8424 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8425 __acquires(&gaudi->hw_queues_lock)
8427 struct gaudi_device *gaudi = hdev->asic_specific;
8429 spin_lock(&gaudi->hw_queues_lock);
8432 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8433 __releases(&gaudi->hw_queues_lock)
8435 struct gaudi_device *gaudi = hdev->asic_specific;
8437 spin_unlock(&gaudi->hw_queues_lock);
8440 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8442 return hdev->pdev->device;
8445 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8448 struct gaudi_device *gaudi = hdev->asic_specific;
8450 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8453 return hl_fw_get_eeprom_data(hdev, data, max_size);
8457 * this function should be used only during initialization and/or after reset,
8458 * when there are no active users.
8460 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8463 struct gaudi_device *gaudi = hdev->asic_specific;
8468 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8471 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8473 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8475 mutex_lock(&gaudi->clk_gate_mutex);
8477 hdev->asic_funcs->disable_clock_gating(hdev);
8479 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8480 lower_32_bits(tpc_kernel));
8481 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8482 upper_32_bits(tpc_kernel));
8484 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8485 lower_32_bits(tpc_kernel));
8486 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8487 upper_32_bits(tpc_kernel));
8488 /* set a valid LUT pointer, content is of no significance */
8489 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8490 lower_32_bits(tpc_kernel));
8491 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8492 upper_32_bits(tpc_kernel));
8494 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8495 lower_32_bits(CFG_BASE +
8496 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8498 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8499 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8500 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8501 /* wait a bit for the engine to start executing */
8502 usleep_range(1000, 1500);
8504 /* wait until engine has finished executing */
8505 rc = hl_poll_timeout(
8507 mmTPC0_CFG_STATUS + offset,
8509 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8510 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8516 "Timeout while waiting for TPC%d icache prefetch\n",
8518 hdev->asic_funcs->set_clock_gating(hdev);
8519 mutex_unlock(&gaudi->clk_gate_mutex);
8523 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8524 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8526 /* wait a bit for the engine to start executing */
8527 usleep_range(1000, 1500);
8529 /* wait until engine has finished executing */
8530 rc = hl_poll_timeout(
8532 mmTPC0_CFG_STATUS + offset,
8534 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8535 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8541 "Timeout while waiting for TPC%d vector pipe\n",
8543 hdev->asic_funcs->set_clock_gating(hdev);
8544 mutex_unlock(&gaudi->clk_gate_mutex);
8548 rc = hl_poll_timeout(
8550 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8556 hdev->asic_funcs->set_clock_gating(hdev);
8557 mutex_unlock(&gaudi->clk_gate_mutex);
8561 "Timeout while waiting for TPC%d kernel to execute\n",
8569 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8572 struct gaudi_device *gaudi = hdev->asic_specific;
8573 int min_alloc_order, rc, collective_cb_size;
8575 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8578 hdev->internal_cb_pool_virt_addr =
8579 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8580 HOST_SPACE_INTERNAL_CB_SZ,
8581 &hdev->internal_cb_pool_dma_addr,
8582 GFP_KERNEL | __GFP_ZERO);
8584 if (!hdev->internal_cb_pool_virt_addr)
8587 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8588 sizeof(struct packet_fence);
8589 min_alloc_order = ilog2(collective_cb_size);
8591 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8592 if (!hdev->internal_cb_pool) {
8594 "Failed to create internal CB pool\n");
8596 goto free_internal_cb_pool;
8599 rc = gen_pool_add(hdev->internal_cb_pool,
8600 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8601 HOST_SPACE_INTERNAL_CB_SZ, -1);
8604 "Failed to add memory to internal CB pool\n");
8606 goto destroy_internal_cb_pool;
8609 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8610 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8611 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8613 if (!hdev->internal_cb_va_base) {
8615 goto destroy_internal_cb_pool;
8618 mutex_lock(&ctx->mmu_lock);
8619 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8620 hdev->internal_cb_pool_dma_addr,
8621 HOST_SPACE_INTERNAL_CB_SZ);
8623 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8624 mutex_unlock(&ctx->mmu_lock);
8627 goto unreserve_internal_cb_pool;
8631 unreserve_internal_cb_pool:
8632 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8633 HOST_SPACE_INTERNAL_CB_SZ);
8634 destroy_internal_cb_pool:
8635 gen_pool_destroy(hdev->internal_cb_pool);
8636 free_internal_cb_pool:
8637 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8638 HOST_SPACE_INTERNAL_CB_SZ,
8639 hdev->internal_cb_pool_virt_addr,
8640 hdev->internal_cb_pool_dma_addr);
8645 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8648 struct gaudi_device *gaudi = hdev->asic_specific;
8650 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8653 mutex_lock(&ctx->mmu_lock);
8654 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8655 HOST_SPACE_INTERNAL_CB_SZ);
8656 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8657 HOST_SPACE_INTERNAL_CB_SZ);
8658 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8659 mutex_unlock(&ctx->mmu_lock);
8661 gen_pool_destroy(hdev->internal_cb_pool);
8663 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8664 HOST_SPACE_INTERNAL_CB_SZ,
8665 hdev->internal_cb_pool_virt_addr,
8666 hdev->internal_cb_pool_dma_addr);
8669 static int gaudi_ctx_init(struct hl_ctx *ctx)
8671 if (ctx->asid == HL_KERNEL_ASID_ID)
8674 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8675 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8678 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8680 if (ctx->asid == HL_KERNEL_ASID_ID)
8683 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8686 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8688 return gaudi_cq_assignment[cq_idx];
8691 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8693 return sizeof(struct packet_msg_short) +
8694 sizeof(struct packet_msg_prot) * 2;
8697 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8699 return sizeof(struct packet_msg_short) * 4 +
8700 sizeof(struct packet_fence) +
8701 sizeof(struct packet_msg_prot) * 2;
8704 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8707 struct hl_cb *cb = (struct hl_cb *) data;
8708 struct packet_msg_short *pkt;
8709 u32 value, ctl, pkt_size = sizeof(*pkt);
8711 pkt = cb->kernel_address + size;
8712 memset(pkt, 0, pkt_size);
8714 /* Inc by 1, Mode ADD */
8715 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8716 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8718 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8719 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8720 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8721 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8722 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8723 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8724 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8726 pkt->value = cpu_to_le32(value);
8727 pkt->ctl = cpu_to_le32(ctl);
8729 return size + pkt_size;
8732 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8735 u32 ctl, pkt_size = sizeof(*pkt);
8737 memset(pkt, 0, pkt_size);
8739 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8740 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8741 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8742 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8743 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8744 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8746 pkt->value = cpu_to_le32(value);
8747 pkt->ctl = cpu_to_le32(ctl);
8752 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8753 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8754 u16 sob_val, u16 mon_id)
8757 u32 ctl, value, pkt_size = sizeof(*pkt);
8758 u16 msg_addr_offset;
8761 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8763 "sob_base %u (mask %#x) is not valid\n",
8764 sob_base, sob_mask);
8769 * monitor_base should be the content of the base0 address registers,
8770 * so it will be added to the msg short offsets
8772 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8775 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8778 memset(pkt, 0, pkt_size);
8780 /* Monitor config packet: bind the monitor to a sync object */
8781 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8782 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8783 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8784 0); /* GREATER OR EQUAL*/
8785 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8787 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8788 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8789 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8790 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8791 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8792 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8793 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8795 pkt->value = cpu_to_le32(value);
8796 pkt->ctl = cpu_to_le32(ctl);
8801 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8803 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8805 memset(pkt, 0, pkt_size);
8807 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8808 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8809 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8811 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8812 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8813 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8814 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8816 pkt->cfg = cpu_to_le32(cfg);
8817 pkt->ctl = cpu_to_le32(ctl);
8822 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8824 u32 offset, nic_index;
8827 case GAUDI_QUEUE_ID_DMA_0_0:
8828 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8830 case GAUDI_QUEUE_ID_DMA_0_1:
8831 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8833 case GAUDI_QUEUE_ID_DMA_0_2:
8834 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8836 case GAUDI_QUEUE_ID_DMA_0_3:
8837 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8839 case GAUDI_QUEUE_ID_DMA_1_0:
8840 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8842 case GAUDI_QUEUE_ID_DMA_1_1:
8843 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8845 case GAUDI_QUEUE_ID_DMA_1_2:
8846 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8848 case GAUDI_QUEUE_ID_DMA_1_3:
8849 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8851 case GAUDI_QUEUE_ID_DMA_5_0:
8852 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8854 case GAUDI_QUEUE_ID_DMA_5_1:
8855 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8857 case GAUDI_QUEUE_ID_DMA_5_2:
8858 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8860 case GAUDI_QUEUE_ID_DMA_5_3:
8861 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8863 case GAUDI_QUEUE_ID_TPC_7_0:
8864 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8866 case GAUDI_QUEUE_ID_TPC_7_1:
8867 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8869 case GAUDI_QUEUE_ID_TPC_7_2:
8870 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8872 case GAUDI_QUEUE_ID_TPC_7_3:
8873 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8875 case GAUDI_QUEUE_ID_NIC_0_0:
8876 case GAUDI_QUEUE_ID_NIC_1_0:
8877 case GAUDI_QUEUE_ID_NIC_2_0:
8878 case GAUDI_QUEUE_ID_NIC_3_0:
8879 case GAUDI_QUEUE_ID_NIC_4_0:
8880 case GAUDI_QUEUE_ID_NIC_5_0:
8881 case GAUDI_QUEUE_ID_NIC_6_0:
8882 case GAUDI_QUEUE_ID_NIC_7_0:
8883 case GAUDI_QUEUE_ID_NIC_8_0:
8884 case GAUDI_QUEUE_ID_NIC_9_0:
8885 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8886 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8887 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8888 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8890 case GAUDI_QUEUE_ID_NIC_0_1:
8891 case GAUDI_QUEUE_ID_NIC_1_1:
8892 case GAUDI_QUEUE_ID_NIC_2_1:
8893 case GAUDI_QUEUE_ID_NIC_3_1:
8894 case GAUDI_QUEUE_ID_NIC_4_1:
8895 case GAUDI_QUEUE_ID_NIC_5_1:
8896 case GAUDI_QUEUE_ID_NIC_6_1:
8897 case GAUDI_QUEUE_ID_NIC_7_1:
8898 case GAUDI_QUEUE_ID_NIC_8_1:
8899 case GAUDI_QUEUE_ID_NIC_9_1:
8900 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8901 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8902 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8903 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8905 case GAUDI_QUEUE_ID_NIC_0_2:
8906 case GAUDI_QUEUE_ID_NIC_1_2:
8907 case GAUDI_QUEUE_ID_NIC_2_2:
8908 case GAUDI_QUEUE_ID_NIC_3_2:
8909 case GAUDI_QUEUE_ID_NIC_4_2:
8910 case GAUDI_QUEUE_ID_NIC_5_2:
8911 case GAUDI_QUEUE_ID_NIC_6_2:
8912 case GAUDI_QUEUE_ID_NIC_7_2:
8913 case GAUDI_QUEUE_ID_NIC_8_2:
8914 case GAUDI_QUEUE_ID_NIC_9_2:
8915 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8916 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8917 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8918 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8920 case GAUDI_QUEUE_ID_NIC_0_3:
8921 case GAUDI_QUEUE_ID_NIC_1_3:
8922 case GAUDI_QUEUE_ID_NIC_2_3:
8923 case GAUDI_QUEUE_ID_NIC_3_3:
8924 case GAUDI_QUEUE_ID_NIC_4_3:
8925 case GAUDI_QUEUE_ID_NIC_5_3:
8926 case GAUDI_QUEUE_ID_NIC_6_3:
8927 case GAUDI_QUEUE_ID_NIC_7_3:
8928 case GAUDI_QUEUE_ID_NIC_8_3:
8929 case GAUDI_QUEUE_ID_NIC_9_3:
8930 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8931 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8932 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8933 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8939 *addr = CFG_BASE + offset;
8944 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8948 u16 msg_addr_offset;
8951 * monitor_base should be the content of the base0 address registers,
8952 * so it will be added to the msg short offsets
8954 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8956 /* First monitor config packet: low address of the sync */
8958 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8961 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8964 /* Second monitor config packet: high address of the sync */
8966 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8969 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8973 * Third monitor config packet: the payload, i.e. what to write when the
8977 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8980 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8985 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8986 struct hl_gen_wait_properties *prop)
8988 struct hl_cb *cb = (struct hl_cb *) prop->data;
8989 void *buf = cb->kernel_address;
8991 u32 size = prop->size;
8993 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8994 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8999 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9000 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9001 prop->sob_mask, prop->sob_val, prop->mon_id);
9002 size += gaudi_add_fence_pkt(buf + size);
9007 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9009 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9012 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9015 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
9016 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9017 hw_sob->sob_id * 4, 1, 0);
9019 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
9021 kref_init(&hw_sob->kref);
9024 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9026 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9027 HL_POWER9_HOST_MAGIC) {
9028 hdev->power9_64bit_dma_enable = 1;
9029 hdev->dma_mask = 64;
9031 hdev->power9_64bit_dma_enable = 0;
9032 hdev->dma_mask = 48;
9036 static u64 gaudi_get_device_time(struct hl_device *hdev)
9038 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9040 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9043 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9044 u32 *block_size, u32 *block_id)
9049 static int gaudi_block_mmap(struct hl_device *hdev,
9050 struct vm_area_struct *vma,
9051 u32 block_id, u32 block_size)
9056 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9058 struct cpu_dyn_regs *dyn_regs =
9059 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9060 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9061 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9062 le32_to_cpu(dyn_regs->gic_host_ints_irq);
9064 WREG32(irq_handler_offset,
9065 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9068 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9071 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9072 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9073 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9074 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9075 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9076 case HL_GAUDI_MME_PLL: return MME_PLL;
9077 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9078 case HL_GAUDI_IF_PLL: return IF_PLL;
9079 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9080 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9081 default: return -EINVAL;
9085 static int gaudi_add_sync_to_engine_map_entry(
9086 struct hl_sync_to_engine_map *map, u32 reg_value,
9087 enum hl_sync_engine_type engine_type, u32 engine_id)
9089 struct hl_sync_to_engine_map_entry *entry;
9091 /* Reg value represents a partial address of sync object,
9092 * it is used as unique identifier. For this we need to
9093 * clear the cutoff cfg base bits from the value.
9095 if (reg_value == 0 || reg_value == 0xffffffff)
9097 reg_value -= (u32)CFG_BASE;
9099 /* create a new hash entry */
9100 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9103 entry->engine_type = engine_type;
9104 entry->engine_id = engine_id;
9105 entry->sync_id = reg_value;
9106 hash_add(map->tb, &entry->node, reg_value);
9111 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9112 struct hl_sync_to_engine_map *map)
9114 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9115 struct gaudi_device *gaudi = hdev->asic_specific;
9119 /* Iterate over TPC engines */
9120 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9121 /* TPC registered must be accessed with clock gating disabled */
9122 mutex_lock(&gaudi->clk_gate_mutex);
9123 hdev->asic_funcs->disable_clock_gating(hdev);
9125 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9126 sds->props[SP_NEXT_TPC] * i);
9128 /* We can reenable clock_gating */
9129 hdev->asic_funcs->set_clock_gating(hdev);
9130 mutex_unlock(&gaudi->clk_gate_mutex);
9132 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9135 goto free_sync_to_engine_map;
9138 /* Iterate over MME engines */
9139 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9140 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9141 /* MME registered must be accessed with clock gating
9144 mutex_lock(&gaudi->clk_gate_mutex);
9145 hdev->asic_funcs->disable_clock_gating(hdev);
9147 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9148 sds->props[SP_NEXT_MME] * i +
9151 /* We can reenable clock_gating */
9152 hdev->asic_funcs->set_clock_gating(hdev);
9153 mutex_unlock(&gaudi->clk_gate_mutex);
9155 rc = gaudi_add_sync_to_engine_map_entry(
9156 map, reg_value, ENGINE_MME,
9157 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9159 goto free_sync_to_engine_map;
9163 /* Iterate over DMA engines */
9164 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9165 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9166 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9167 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9170 goto free_sync_to_engine_map;
9175 free_sync_to_engine_map:
9176 hl_state_dump_free_sync_to_engine_map(map);
9181 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9184 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9188 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9189 struct hl_device *hdev,
9190 struct hl_mon_state_dump *mon)
9193 char scratch_buf1[BIN_REG_STRING_SIZE],
9194 scratch_buf2[BIN_REG_STRING_SIZE];
9196 name = hl_state_dump_get_monitor_name(hdev, mon);
9200 return hl_snprintf_resize(
9202 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9204 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9206 hl_format_as_binary(
9207 scratch_buf1, sizeof(scratch_buf1),
9209 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9211 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9214 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9215 hl_format_as_binary(
9216 scratch_buf2, sizeof(scratch_buf2),
9218 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9223 static int gaudi_print_fences_single_engine(
9224 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9225 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9226 size_t *size, size_t *offset)
9228 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9229 int rc = -ENOMEM, i;
9230 u32 *statuses, *fences;
9232 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9233 sizeof(*statuses), GFP_KERNEL);
9237 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9238 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9239 sizeof(*fences), GFP_KERNEL);
9243 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9244 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9246 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9247 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9248 fences[i] = RREG32(base_offset + i * sizeof(u32));
9250 /* The actual print */
9251 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9253 u64 fence_cnt, fence_rdata;
9254 const char *engine_name;
9256 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9261 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9262 fence_cnt = base_offset + CFG_BASE +
9264 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9265 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9266 sds->props[SP_FENCE0_RDATA_OFFSET];
9267 engine_name = hl_sync_engine_to_string(engine_type);
9269 rc = hl_snprintf_resize(
9271 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9272 engine_name, engine_id,
9274 fence_cnt, engine_name, engine_id, fence_id, i,
9275 fence_rdata, engine_name, engine_id, fence_id, i,
9293 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9294 .monitor_valid = gaudi_monitor_valid,
9295 .print_single_monitor = gaudi_print_single_monitor,
9296 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9297 .print_fences_single_engine = gaudi_print_fences_single_engine,
9300 static void gaudi_state_dump_init(struct hl_device *hdev)
9302 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9305 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9306 hash_add(sds->so_id_to_str_tb,
9307 &gaudi_so_id_to_str[i].node,
9308 gaudi_so_id_to_str[i].id);
9310 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9311 hash_add(sds->monitor_id_to_str_tb,
9312 &gaudi_monitor_id_to_str[i].node,
9313 gaudi_monitor_id_to_str[i].id);
9315 sds->props = gaudi_state_dump_specs_props;
9317 sds->sync_namager_names = gaudi_sync_manager_names;
9319 sds->funcs = gaudi_state_dump_funcs;
9322 static const struct hl_asic_funcs gaudi_funcs = {
9323 .early_init = gaudi_early_init,
9324 .early_fini = gaudi_early_fini,
9325 .late_init = gaudi_late_init,
9326 .late_fini = gaudi_late_fini,
9327 .sw_init = gaudi_sw_init,
9328 .sw_fini = gaudi_sw_fini,
9329 .hw_init = gaudi_hw_init,
9330 .hw_fini = gaudi_hw_fini,
9331 .halt_engines = gaudi_halt_engines,
9332 .suspend = gaudi_suspend,
9333 .resume = gaudi_resume,
9335 .ring_doorbell = gaudi_ring_doorbell,
9336 .pqe_write = gaudi_pqe_write,
9337 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9338 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9339 .scrub_device_mem = gaudi_scrub_device_mem,
9340 .get_int_queue_base = gaudi_get_int_queue_base,
9341 .test_queues = gaudi_test_queues,
9342 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9343 .asic_dma_pool_free = gaudi_dma_pool_free,
9344 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9345 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9346 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9347 .cs_parser = gaudi_cs_parser,
9348 .asic_dma_map_sg = gaudi_dma_map_sg,
9349 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9350 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9351 .update_eq_ci = gaudi_update_eq_ci,
9352 .context_switch = gaudi_context_switch,
9353 .restore_phase_topology = gaudi_restore_phase_topology,
9354 .debugfs_read32 = gaudi_debugfs_read32,
9355 .debugfs_write32 = gaudi_debugfs_write32,
9356 .debugfs_read64 = gaudi_debugfs_read64,
9357 .debugfs_write64 = gaudi_debugfs_write64,
9358 .debugfs_read_dma = gaudi_debugfs_read_dma,
9359 .add_device_attr = gaudi_add_device_attr,
9360 .handle_eqe = gaudi_handle_eqe,
9361 .set_pll_profile = gaudi_set_pll_profile,
9362 .get_events_stat = gaudi_get_events_stat,
9363 .read_pte = gaudi_read_pte,
9364 .write_pte = gaudi_write_pte,
9365 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9366 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9367 .send_heartbeat = gaudi_send_heartbeat,
9368 .set_clock_gating = gaudi_set_clock_gating,
9369 .disable_clock_gating = gaudi_disable_clock_gating,
9370 .debug_coresight = gaudi_debug_coresight,
9371 .is_device_idle = gaudi_is_device_idle,
9372 .soft_reset_late_init = gaudi_soft_reset_late_init,
9373 .hw_queues_lock = gaudi_hw_queues_lock,
9374 .hw_queues_unlock = gaudi_hw_queues_unlock,
9375 .get_pci_id = gaudi_get_pci_id,
9376 .get_eeprom_data = gaudi_get_eeprom_data,
9377 .send_cpu_message = gaudi_send_cpu_message,
9378 .pci_bars_map = gaudi_pci_bars_map,
9379 .init_iatu = gaudi_init_iatu,
9382 .halt_coresight = gaudi_halt_coresight,
9383 .ctx_init = gaudi_ctx_init,
9384 .ctx_fini = gaudi_ctx_fini,
9385 .get_clk_rate = gaudi_get_clk_rate,
9386 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9387 .load_firmware_to_device = gaudi_load_firmware_to_device,
9388 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9389 .get_signal_cb_size = gaudi_get_signal_cb_size,
9390 .get_wait_cb_size = gaudi_get_wait_cb_size,
9391 .gen_signal_cb = gaudi_gen_signal_cb,
9392 .gen_wait_cb = gaudi_gen_wait_cb,
9393 .reset_sob = gaudi_reset_sob,
9394 .reset_sob_group = gaudi_reset_sob_group,
9395 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9396 .get_device_time = gaudi_get_device_time,
9397 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9398 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9399 .scramble_addr = hl_mmu_scramble_addr,
9400 .descramble_addr = hl_mmu_descramble_addr,
9401 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9402 .get_hw_block_id = gaudi_get_hw_block_id,
9403 .hw_block_mmap = gaudi_block_mmap,
9404 .enable_events_from_fw = gaudi_enable_events_from_fw,
9405 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9406 .init_firmware_loader = gaudi_init_firmware_loader,
9407 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9408 .state_dump_init = gaudi_state_dump_init
9412 * gaudi_set_asic_funcs - set GAUDI function pointers
9414 * @hdev: pointer to hl_device structure
9417 void gaudi_set_asic_funcs(struct hl_device *hdev)
9419 hdev->asic_funcs = &gaudi_funcs;