1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85 #define GAUDI_MAX_STRING_LEN 20
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
107 #define GAUDI_PLL_MAX 10
109 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
111 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
118 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
119 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
121 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
124 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
129 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130 [0] = GAUDI_QUEUE_ID_DMA_0_0,
131 [1] = GAUDI_QUEUE_ID_DMA_0_1,
132 [2] = GAUDI_QUEUE_ID_DMA_0_2,
133 [3] = GAUDI_QUEUE_ID_DMA_0_3,
134 [4] = GAUDI_QUEUE_ID_DMA_1_0,
135 [5] = GAUDI_QUEUE_ID_DMA_1_1,
136 [6] = GAUDI_QUEUE_ID_DMA_1_2,
137 [7] = GAUDI_QUEUE_ID_DMA_1_3,
140 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
142 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
143 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
144 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
145 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
146 [PACKET_REPEAT] = sizeof(struct packet_repeat),
147 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
148 [PACKET_FENCE] = sizeof(struct packet_fence),
149 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
150 [PACKET_NOP] = sizeof(struct packet_nop),
151 [PACKET_STOP] = sizeof(struct packet_stop),
152 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
153 [PACKET_WAIT] = sizeof(struct packet_wait),
154 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
157 static inline bool validate_packet_id(enum packet_id id)
161 case PACKET_WREG_BULK:
162 case PACKET_MSG_LONG:
163 case PACKET_MSG_SHORT:
166 case PACKET_MSG_PROT:
171 case PACKET_ARB_POINT:
173 case PACKET_LOAD_AND_EXE:
180 static const char * const
181 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182 "tpc_address_exceed_slm",
184 "tpc_spu_mac_overflow",
185 "tpc_spu_addsub_overflow",
186 "tpc_spu_abs_overflow",
187 "tpc_spu_fp_dst_nan_inf",
188 "tpc_spu_fp_dst_denorm",
189 "tpc_vpu_mac_overflow",
190 "tpc_vpu_addsub_overflow",
191 "tpc_vpu_abs_overflow",
192 "tpc_vpu_fp_dst_nan_inf",
193 "tpc_vpu_fp_dst_denorm",
195 "tpc_illegal_instruction",
196 "tpc_pc_wrap_around",
204 static const char * const
205 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 "CP error due to undefined OPCODE",
210 "CP encountered STOP OPCODE",
212 "CP WRREG32 or WRBULK returned error",
214 "FENCE 0 inc over max value and clipped",
215 "FENCE 1 inc over max value and clipped",
216 "FENCE 2 inc over max value and clipped",
217 "FENCE 3 inc over max value and clipped",
218 "FENCE 0 dec under min value and clipped",
219 "FENCE 1 dec under min value and clipped",
220 "FENCE 2 dec under min value and clipped",
221 "FENCE 3 dec under min value and clipped"
224 static const char * const
225 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226 "Choice push while full error",
227 "Choice Q watchdog error",
228 "MSG AXI LBW returned with error"
231 enum gaudi_sm_sei_cause {
232 GAUDI_SM_SEI_SO_OVERFLOW,
233 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234 GAUDI_SM_SEI_AXI_RESPONSE_ERR
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385 { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
397 static s64 gaudi_state_dump_specs_props[] = {
398 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401 [SP_MON_OBJ_WR_ADDR_LOW] =
402 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403 [SP_MON_OBJ_WR_ADDR_HIGH] =
404 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425 [SP_FENCE0_CNT_OFFSET] =
426 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_FENCE0_RDATA_OFFSET] =
428 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
433 static const char * const gaudi_sync_manager_names[] = {
434 "SYNC_MGR_E_N", "SYNC_MGR_W_N", "SYNC_MGR_E_S", "SYNC_MGR_W_S",
438 struct ecc_info_extract_params {
442 bool disable_clock_gating;
445 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
447 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
448 struct hl_cs_job *job);
449 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
451 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
452 u32 num_regs, u32 val);
453 static int gaudi_schedule_register_memset(struct hl_device *hdev,
454 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
455 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
457 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
458 static int gaudi_cpucp_info_get(struct hl_device *hdev);
459 static void gaudi_disable_clock_gating(struct hl_device *hdev);
460 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
461 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
463 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
464 struct hl_gen_wait_properties *prop);
466 static inline enum hl_collective_mode
467 get_collective_mode(struct hl_device *hdev, u32 queue_id)
469 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
470 return HL_COLLECTIVE_MASTER;
472 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
473 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
474 return HL_COLLECTIVE_SLAVE;
476 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
477 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
478 return HL_COLLECTIVE_SLAVE;
480 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
481 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
482 return HL_COLLECTIVE_SLAVE;
484 return HL_COLLECTIVE_NOT_SUPPORTED;
487 static inline void set_default_power_values(struct hl_device *hdev)
489 struct asic_fixed_properties *prop = &hdev->asic_prop;
491 if (hdev->card_type == cpucp_card_type_pmc) {
492 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
493 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
495 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
496 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
500 static int gaudi_set_fixed_properties(struct hl_device *hdev)
502 struct asic_fixed_properties *prop = &hdev->asic_prop;
503 u32 num_sync_stream_queues = 0;
506 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
507 prop->hw_queues_props = kcalloc(prop->max_queues,
508 sizeof(struct hw_queue_properties),
511 if (!prop->hw_queues_props)
514 for (i = 0 ; i < prop->max_queues ; i++) {
515 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
516 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
517 prop->hw_queues_props[i].driver_only = 0;
518 prop->hw_queues_props[i].supports_sync_stream = 1;
519 prop->hw_queues_props[i].cb_alloc_flags =
521 num_sync_stream_queues++;
522 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
523 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
524 prop->hw_queues_props[i].driver_only = 1;
525 prop->hw_queues_props[i].supports_sync_stream = 0;
526 prop->hw_queues_props[i].cb_alloc_flags =
528 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
529 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
530 prop->hw_queues_props[i].driver_only = 0;
531 prop->hw_queues_props[i].supports_sync_stream = 0;
532 prop->hw_queues_props[i].cb_alloc_flags =
536 prop->hw_queues_props[i].collective_mode =
537 get_collective_mode(hdev, i);
540 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
541 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
542 prop->collective_first_sob = 0;
543 prop->collective_first_mon = 0;
545 /* 2 SOBs per internal queue stream are reserved for collective */
546 prop->sync_stream_first_sob =
547 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
548 * QMAN_STREAMS * HL_RSVD_SOBS;
550 /* 1 monitor per internal queue stream are reserved for collective
551 * 2 monitors per external queue stream are reserved for collective
553 prop->sync_stream_first_mon =
554 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
555 (NUMBER_OF_EXT_HW_QUEUES * 2);
557 prop->dram_base_address = DRAM_PHYS_BASE;
558 prop->dram_size = GAUDI_HBM_SIZE_32GB;
559 prop->dram_end_address = prop->dram_base_address +
561 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
563 prop->sram_base_address = SRAM_BASE_ADDR;
564 prop->sram_size = SRAM_SIZE;
565 prop->sram_end_address = prop->sram_base_address +
567 prop->sram_user_base_address = prop->sram_base_address +
568 SRAM_USER_BASE_OFFSET;
570 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
572 prop->mmu_pgt_size = 0x800000; /* 8MB */
574 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
575 prop->mmu_pte_size = HL_PTE_SIZE;
576 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
577 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
578 prop->dram_page_size = PAGE_SIZE_2MB;
579 prop->dram_supports_virtual_memory = false;
581 prop->pmmu.hop0_shift = HOP0_SHIFT;
582 prop->pmmu.hop1_shift = HOP1_SHIFT;
583 prop->pmmu.hop2_shift = HOP2_SHIFT;
584 prop->pmmu.hop3_shift = HOP3_SHIFT;
585 prop->pmmu.hop4_shift = HOP4_SHIFT;
586 prop->pmmu.hop0_mask = HOP0_MASK;
587 prop->pmmu.hop1_mask = HOP1_MASK;
588 prop->pmmu.hop2_mask = HOP2_MASK;
589 prop->pmmu.hop3_mask = HOP3_MASK;
590 prop->pmmu.hop4_mask = HOP4_MASK;
591 prop->pmmu.start_addr = VA_HOST_SPACE_START;
592 prop->pmmu.end_addr =
593 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
594 prop->pmmu.page_size = PAGE_SIZE_4KB;
595 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
597 /* PMMU and HPMMU are the same except of page size */
598 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
599 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
601 /* shifts and masks are the same in PMMU and DMMU */
602 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
603 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
604 prop->dmmu.end_addr = VA_HOST_SPACE_END;
605 prop->dmmu.page_size = PAGE_SIZE_2MB;
607 prop->cfg_size = CFG_SIZE;
608 prop->max_asid = MAX_ASID;
609 prop->num_of_events = GAUDI_EVENT_SIZE;
610 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
612 set_default_power_values(hdev);
614 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
615 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
617 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
618 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
620 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
623 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
625 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
626 prop->sync_stream_first_sob +
627 (num_sync_stream_queues * HL_RSVD_SOBS);
628 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
629 prop->sync_stream_first_mon +
630 (num_sync_stream_queues * HL_RSVD_MONS);
632 prop->first_available_user_msix_interrupt = USHRT_MAX;
634 for (i = 0 ; i < HL_MAX_DCORES ; i++)
635 prop->first_available_cq[i] = USHRT_MAX;
637 prop->fw_cpu_boot_dev_sts0_valid = false;
638 prop->fw_cpu_boot_dev_sts1_valid = false;
639 prop->hard_reset_done_by_fw = false;
640 prop->gic_interrupts_enable = true;
645 static int gaudi_pci_bars_map(struct hl_device *hdev)
647 static const char * const name[] = {"SRAM", "CFG", "HBM"};
648 bool is_wc[3] = {false, false, true};
651 rc = hl_pci_bars_map(hdev, name, is_wc);
655 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
656 (CFG_BASE - SPI_FLASH_BASE_ADDR);
661 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
663 struct gaudi_device *gaudi = hdev->asic_specific;
664 struct hl_inbound_pci_region pci_region;
668 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
671 if (hdev->asic_prop.iatu_done_by_fw)
674 /* Inbound Region 2 - Bar 4 - Point to HBM */
675 pci_region.mode = PCI_BAR_MATCH_MODE;
676 pci_region.bar = HBM_BAR_ID;
677 pci_region.addr = addr;
678 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
683 old_addr = gaudi->hbm_bar_cur_addr;
684 gaudi->hbm_bar_cur_addr = addr;
690 static int gaudi_init_iatu(struct hl_device *hdev)
692 struct hl_inbound_pci_region inbound_region;
693 struct hl_outbound_pci_region outbound_region;
696 if (hdev->asic_prop.iatu_done_by_fw)
699 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
700 inbound_region.mode = PCI_BAR_MATCH_MODE;
701 inbound_region.bar = SRAM_BAR_ID;
702 inbound_region.addr = SRAM_BASE_ADDR;
703 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
707 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
708 inbound_region.mode = PCI_BAR_MATCH_MODE;
709 inbound_region.bar = CFG_BAR_ID;
710 inbound_region.addr = SPI_FLASH_BASE_ADDR;
711 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
715 /* Inbound Region 2 - Bar 4 - Point to HBM */
716 inbound_region.mode = PCI_BAR_MATCH_MODE;
717 inbound_region.bar = HBM_BAR_ID;
718 inbound_region.addr = DRAM_PHYS_BASE;
719 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
723 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
725 /* Outbound Region 0 - Point to Host */
726 outbound_region.addr = HOST_PHYS_BASE;
727 outbound_region.size = HOST_PHYS_SIZE;
728 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
734 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
736 return RREG32(mmHW_STATE);
739 static int gaudi_early_init(struct hl_device *hdev)
741 struct asic_fixed_properties *prop = &hdev->asic_prop;
742 struct pci_dev *pdev = hdev->pdev;
746 rc = gaudi_set_fixed_properties(hdev);
748 dev_err(hdev->dev, "Failed setting fixed properties\n");
752 /* Check BAR sizes */
753 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
755 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
757 (unsigned long long) pci_resource_len(pdev,
761 goto free_queue_props;
764 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
766 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
768 (unsigned long long) pci_resource_len(pdev,
772 goto free_queue_props;
775 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
777 /* If FW security is enabled at this point it means no access to ELBI */
778 if (hdev->asic_prop.fw_security_enabled) {
779 hdev->asic_prop.iatu_done_by_fw = true;
782 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
783 * decision can only be taken based on PCI ID security.
785 hdev->asic_prop.gic_interrupts_enable = false;
789 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
792 goto free_queue_props;
794 /* Check whether FW is configuring iATU */
795 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
796 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
797 hdev->asic_prop.iatu_done_by_fw = true;
800 rc = hl_pci_init(hdev);
802 goto free_queue_props;
804 /* Before continuing in the initialization, we need to read the preboot
805 * version to determine whether we run with a security-enabled firmware
807 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
809 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
811 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
813 if (hdev->reset_on_preboot_fail)
814 hdev->asic_funcs->hw_fini(hdev, true);
818 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
820 "H/W state is dirty, must reset before initializing\n");
821 hdev->asic_funcs->hw_fini(hdev, true);
829 kfree(hdev->asic_prop.hw_queues_props);
833 static int gaudi_early_fini(struct hl_device *hdev)
835 kfree(hdev->asic_prop.hw_queues_props);
842 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
844 * @hdev: pointer to hl_device structure
847 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
849 struct asic_fixed_properties *prop = &hdev->asic_prop;
850 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
851 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
854 if (hdev->asic_prop.fw_security_enabled) {
855 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
860 freq = pll_freq_arr[2];
862 /* Backward compatibility */
863 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
864 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
865 nr = RREG32(mmPSOC_CPU_PLL_NR);
866 nf = RREG32(mmPSOC_CPU_PLL_NF);
867 od = RREG32(mmPSOC_CPU_PLL_OD);
869 if (div_sel == DIV_SEL_REF_CLK ||
870 div_sel == DIV_SEL_DIVIDED_REF) {
871 if (div_sel == DIV_SEL_REF_CLK)
874 freq = PLL_REF_CLK / (div_fctr + 1);
875 } else if (div_sel == DIV_SEL_PLL_CLK ||
876 div_sel == DIV_SEL_DIVIDED_PLL) {
877 pll_clk = PLL_REF_CLK * (nf + 1) /
878 ((nr + 1) * (od + 1));
879 if (div_sel == DIV_SEL_PLL_CLK)
882 freq = pll_clk / (div_fctr + 1);
885 "Received invalid div select value: %d",
891 prop->psoc_timestamp_frequency = freq;
892 prop->psoc_pci_pll_nr = nr;
893 prop->psoc_pci_pll_nf = nf;
894 prop->psoc_pci_pll_od = od;
895 prop->psoc_pci_pll_div_factor = div_fctr;
900 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
901 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
903 struct asic_fixed_properties *prop = &hdev->asic_prop;
904 struct packet_lin_dma *init_tpc_mem_pkt;
905 struct hl_cs_job *job;
912 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
916 init_tpc_mem_pkt = cb->kernel_address;
917 cb_size = sizeof(*init_tpc_mem_pkt);
918 memset(init_tpc_mem_pkt, 0, cb_size);
920 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
922 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
923 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
924 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
925 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
927 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
929 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
930 dst_addr = (prop->sram_user_base_address &
931 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
932 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
933 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
935 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
937 dev_err(hdev->dev, "Failed to allocate a new job\n");
944 atomic_inc(&job->user_cb->cs_cnt);
945 job->user_cb_size = cb_size;
946 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
947 job->patched_cb = job->user_cb;
948 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
950 hl_debugfs_add_job(hdev, job);
952 rc = gaudi_send_job_on_qman0(hdev, job);
957 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
958 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
964 hl_userptr_delete_list(hdev, &job->userptr_list);
965 hl_debugfs_remove_job(hdev, job);
967 atomic_dec(&cb->cs_cnt);
971 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
977 * gaudi_init_tpc_mem() - Initialize TPC memories.
978 * @hdev: Pointer to hl_device structure.
980 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
982 * Return: 0 for success, negative value for error.
984 static int gaudi_init_tpc_mem(struct hl_device *hdev)
986 const struct firmware *fw;
989 dma_addr_t dma_handle;
993 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
994 if (rc == -EINTR && count-- > 0) {
1000 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1006 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1007 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1010 "Failed to allocate %zu of dma memory for TPC kernel\n",
1016 memcpy(cpu_addr, fw->data, fw_size);
1018 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1020 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1024 release_firmware(fw);
1028 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1030 struct gaudi_device *gaudi = hdev->asic_specific;
1031 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1032 struct hl_hw_queue *q;
1033 u32 i, sob_id, sob_group_id, queue_id;
1035 /* Iterate through SOB groups and assign a SOB for each slave queue */
1037 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1038 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1040 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1041 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1042 q = &hdev->kernel_queues[queue_id + (4 * i)];
1043 q->sync_stream_prop.collective_sob_id = sob_id + i;
1046 /* Both DMA5 and TPC7 use the same resources since only a single
1047 * engine need to participate in the reduction process
1049 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1050 q = &hdev->kernel_queues[queue_id];
1051 q->sync_stream_prop.collective_sob_id =
1052 sob_id + NIC_NUMBER_OF_ENGINES;
1054 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1055 q = &hdev->kernel_queues[queue_id];
1056 q->sync_stream_prop.collective_sob_id =
1057 sob_id + NIC_NUMBER_OF_ENGINES;
1060 static void gaudi_sob_group_hw_reset(struct kref *ref)
1062 struct gaudi_hw_sob_group *hw_sob_group =
1063 container_of(ref, struct gaudi_hw_sob_group, kref);
1064 struct hl_device *hdev = hw_sob_group->hdev;
1068 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1069 hw_sob_group->base_sob_id * 4;
1070 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1071 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1074 "failed resetting sob group - sob base %u, count %u",
1075 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
1077 kref_init(&hw_sob_group->kref);
1080 static void gaudi_sob_group_reset_error(struct kref *ref)
1082 struct gaudi_hw_sob_group *hw_sob_group =
1083 container_of(ref, struct gaudi_hw_sob_group, kref);
1084 struct hl_device *hdev = hw_sob_group->hdev;
1087 "SOB release shouldn't be called here, base_sob_id: %d\n",
1088 hw_sob_group->base_sob_id);
1091 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1093 struct gaudi_collective_properties *prop;
1096 prop = &gaudi->collective_props;
1098 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1100 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1101 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1102 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1103 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1104 /* Set collective engine bit */
1105 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1106 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1109 static int gaudi_collective_init(struct hl_device *hdev)
1111 u32 i, sob_id, reserved_sobs_per_group;
1112 struct gaudi_collective_properties *prop;
1113 struct gaudi_device *gaudi;
1115 gaudi = hdev->asic_specific;
1116 prop = &gaudi->collective_props;
1117 sob_id = hdev->asic_prop.collective_first_sob;
1119 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1120 reserved_sobs_per_group =
1121 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1123 /* Init SOB groups */
1124 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1125 prop->hw_sob_group[i].hdev = hdev;
1126 prop->hw_sob_group[i].base_sob_id = sob_id;
1127 sob_id += reserved_sobs_per_group;
1128 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1131 for (i = 0 ; i < QMAN_STREAMS; i++) {
1132 prop->next_sob_group_val[i] = 1;
1133 prop->curr_sob_group_idx[i] = 0;
1134 gaudi_collective_map_sobs(hdev, i);
1137 gaudi_collective_mstr_sob_mask_set(gaudi);
1142 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1144 struct gaudi_device *gaudi = hdev->asic_specific;
1145 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1147 kref_put(&cprop->hw_sob_group[sob_group].kref,
1148 gaudi_sob_group_hw_reset);
1151 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1152 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1154 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1155 struct gaudi_collective_properties *cprop;
1156 struct hl_gen_wait_properties wait_prop;
1157 struct hl_sync_stream_properties *prop;
1158 struct gaudi_device *gaudi;
1160 gaudi = hdev->asic_specific;
1161 cprop = &gaudi->collective_props;
1162 queue_id = job->hw_queue_id;
1163 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1166 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1167 master_monitor = prop->collective_mstr_mon_id[0];
1169 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1172 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1173 master_sob_base, cprop->mstr_sob_mask[0],
1174 cprop->next_sob_group_val[stream],
1175 master_monitor, queue_id);
1177 wait_prop.data = (void *) job->patched_cb;
1178 wait_prop.sob_base = master_sob_base;
1179 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1180 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1181 wait_prop.mon_id = master_monitor;
1182 wait_prop.q_idx = queue_id;
1183 wait_prop.size = cb_size;
1184 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1186 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1187 master_monitor = prop->collective_mstr_mon_id[1];
1190 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1191 master_sob_base, cprop->mstr_sob_mask[1],
1192 cprop->next_sob_group_val[stream],
1193 master_monitor, queue_id);
1195 wait_prop.sob_base = master_sob_base;
1196 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1197 wait_prop.mon_id = master_monitor;
1198 wait_prop.size = cb_size;
1199 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1202 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1203 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1205 struct hl_gen_wait_properties wait_prop;
1206 struct hl_sync_stream_properties *prop;
1207 u32 queue_id, cb_size = 0;
1209 queue_id = job->hw_queue_id;
1210 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1212 /* Add to wait CBs using slave monitor */
1213 wait_prop.data = (void *) job->user_cb;
1214 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1215 wait_prop.sob_mask = 0x1;
1216 wait_prop.sob_val = cs_cmpl->sob_val;
1217 wait_prop.mon_id = prop->collective_slave_mon_id;
1218 wait_prop.q_idx = queue_id;
1219 wait_prop.size = cb_size;
1222 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1223 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1224 prop->collective_slave_mon_id, queue_id);
1226 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1229 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1230 prop->collective_sob_id, queue_id);
1232 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1233 prop->collective_sob_id, cb_size, false);
1236 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1238 struct hl_cs_compl *signal_cs_cmpl =
1239 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1240 struct hl_cs_compl *cs_cmpl =
1241 container_of(cs->fence, struct hl_cs_compl, base_fence);
1242 struct gaudi_collective_properties *cprop;
1243 u32 stream, queue_id, sob_group_offset;
1244 struct gaudi_device *gaudi;
1245 struct hl_device *hdev;
1246 struct hl_cs_job *job;
1251 gaudi = hdev->asic_specific;
1252 cprop = &gaudi->collective_props;
1254 /* copy the SOB id and value of the signal CS */
1255 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1256 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1258 /* Calculate the stream from collective master queue (1st job) */
1259 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1260 stream = job->hw_queue_id % 4;
1262 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1264 list_for_each_entry(job, &cs->job_list, cs_node) {
1265 queue_id = job->hw_queue_id;
1267 if (hdev->kernel_queues[queue_id].collective_mode ==
1268 HL_COLLECTIVE_MASTER)
1269 gaudi_collective_master_init_job(hdev, job, stream,
1272 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1275 cs_cmpl->sob_group = sob_group_offset;
1277 /* Handle sob group kref and wraparound */
1278 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1279 cprop->next_sob_group_val[stream]++;
1281 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1283 * Decrement as we reached the max value.
1284 * The release function won't be called here as we've
1285 * just incremented the refcount.
1287 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1288 gaudi_sob_group_reset_error);
1289 cprop->next_sob_group_val[stream] = 1;
1290 /* only two SOBs are currently in use */
1291 cprop->curr_sob_group_idx[stream] =
1292 (cprop->curr_sob_group_idx[stream] + 1) &
1295 gaudi_collective_map_sobs(hdev, stream);
1297 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1298 cprop->curr_sob_group_idx[stream], stream);
1301 /* Increment kref since all slave queues are now waiting on it */
1302 kref_get(&cs_cmpl->hw_sob->kref);
1304 * Must put the signal fence after the SOB refcnt increment so
1305 * the SOB refcnt won't turn 0 and reset the SOB before the
1306 * wait CS was submitted.
1309 hl_fence_put(cs->signal_fence);
1310 cs->signal_fence = NULL;
1313 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1314 struct hl_ctx *ctx, struct hl_cs *cs,
1315 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1317 struct hw_queue_properties *hw_queue_prop;
1318 struct hl_cs_counters_atomic *cntr;
1319 struct hl_cs_job *job;
1324 cntr = &hdev->aggregated_cs_counters;
1326 if (mode == HL_COLLECTIVE_MASTER) {
1327 /* CB size of collective master queue contains
1328 * 4 msg short packets for monitor 1 configuration
1330 * 4 msg short packets for monitor 2 configuration
1332 * 2 msg prot packets for completion and MSI-X
1334 cb_size = sizeof(struct packet_msg_short) * 8 +
1335 sizeof(struct packet_fence) * 2 +
1336 sizeof(struct packet_msg_prot) * 2;
1339 /* CB size of collective slave queues contains
1340 * 4 msg short packets for monitor configuration
1342 * 1 additional msg short packet for sob signal
1344 cb_size = sizeof(struct packet_msg_short) * 5 +
1345 sizeof(struct packet_fence);
1349 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1350 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1352 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1353 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1354 dev_err(hdev->dev, "Failed to allocate a new job\n");
1358 /* Allocate internal mapped CB for non patched CBs */
1359 cb = hl_cb_kernel_create(hdev, cb_size,
1360 hdev->mmu_enable && !patched_cb);
1362 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1363 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1371 atomic_inc(&job->user_cb->cs_cnt);
1372 job->user_cb_size = cb_size;
1373 job->hw_queue_id = queue_id;
1376 * No need in parsing, user CB is the patched CB.
1377 * We call hl_cb_destroy() out of two reasons - we don't need
1378 * the CB in the CB idr anymore and to decrement its refcount as
1379 * it was incremented inside hl_cb_kernel_create().
1382 job->patched_cb = job->user_cb;
1384 job->patched_cb = NULL;
1386 job->job_cb_size = job->user_cb_size;
1387 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1389 /* increment refcount as for external queues we get completion */
1390 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1393 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1395 list_add_tail(&job->cs_node, &cs->job_list);
1397 hl_debugfs_add_job(hdev, job);
1402 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1403 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1404 u32 collective_engine_id)
1406 struct gaudi_device *gaudi = hdev->asic_specific;
1407 struct hw_queue_properties *hw_queue_prop;
1408 u32 queue_id, collective_queue, num_jobs;
1409 u32 stream, nic_queue, nic_idx = 0;
1413 /* Verify wait queue id is configured as master */
1414 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1415 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1417 "Queue %d is not configured as collective master\n",
1422 /* Verify engine id is supported */
1423 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1424 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1426 "Collective wait does not support engine %u\n",
1427 collective_engine_id);
1431 stream = wait_queue_id % 4;
1433 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1434 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1436 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1438 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1439 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1441 /* First job goes to the collective master queue, it will wait for
1442 * the collective slave queues to finish execution.
1443 * The synchronization is done using two monitors:
1444 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1445 * reduction engine (DMA5/TPC7).
1447 * Rest of the jobs goes to the collective slave queues which will
1448 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1450 for (i = 0 ; i < num_jobs ; i++) {
1452 queue_id = wait_queue_id;
1453 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1454 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1456 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1457 if (gaudi->hw_cap_initialized &
1458 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1463 queue_id = nic_queue;
1470 queue_id = collective_queue;
1473 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1474 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1484 static int gaudi_late_init(struct hl_device *hdev)
1486 struct gaudi_device *gaudi = hdev->asic_specific;
1489 rc = gaudi->cpucp_info_get(hdev);
1491 dev_err(hdev->dev, "Failed to get cpucp info\n");
1495 if ((hdev->card_type == cpucp_card_type_pci) &&
1496 (hdev->nic_ports_mask & 0x3)) {
1498 "PCI card detected, only 8 ports are enabled\n");
1499 hdev->nic_ports_mask &= ~0x3;
1501 /* Stop and disable unused NIC QMANs */
1502 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1503 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1504 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1506 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1507 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1508 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1510 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1511 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1513 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1516 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1518 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1522 rc = gaudi_fetch_psoc_frequency(hdev);
1524 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1525 goto disable_pci_access;
1528 rc = gaudi_mmu_clear_pgt_range(hdev);
1530 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1531 goto disable_pci_access;
1534 rc = gaudi_init_tpc_mem(hdev);
1536 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1537 goto disable_pci_access;
1540 rc = gaudi_collective_init(hdev);
1542 dev_err(hdev->dev, "Failed to init collective\n");
1543 goto disable_pci_access;
1549 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1554 static void gaudi_late_fini(struct hl_device *hdev)
1556 const struct hwmon_channel_info **channel_info_arr;
1559 if (!hdev->hl_chip_info->info)
1562 channel_info_arr = hdev->hl_chip_info->info;
1564 while (channel_info_arr[i]) {
1565 kfree(channel_info_arr[i]->config);
1566 kfree(channel_info_arr[i]);
1570 kfree(channel_info_arr);
1572 hdev->hl_chip_info->info = NULL;
1575 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1577 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1578 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1582 * The device CPU works with 40-bits addresses, while bit 39 must be set
1583 * to '1' when accessing the host.
1584 * Bits 49:39 of the full host address are saved for a later
1585 * configuration of the HW to perform extension to 50 bits.
1586 * Because there is a single HW register that holds the extension bits,
1587 * these bits must be identical in all allocated range.
1590 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1592 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1593 HL_CPU_ACCESSIBLE_MEM_SIZE,
1595 GFP_KERNEL | __GFP_ZERO);
1596 if (!virt_addr_arr[i]) {
1598 goto free_dma_mem_arr;
1601 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1602 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1603 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1607 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1609 "MSB of CPU accessible DMA memory are not identical in all range\n");
1611 goto free_dma_mem_arr;
1614 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1615 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1616 hdev->cpu_pci_msb_addr =
1617 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1619 if (!hdev->asic_prop.fw_security_enabled)
1620 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1623 for (j = 0 ; j < i ; j++)
1624 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1625 HL_CPU_ACCESSIBLE_MEM_SIZE,
1632 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1634 struct gaudi_device *gaudi = hdev->asic_specific;
1635 struct gaudi_internal_qman_info *q;
1638 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1639 q = &gaudi->internal_qmans[i];
1640 if (!q->pq_kernel_addr)
1642 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1648 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1650 struct gaudi_device *gaudi = hdev->asic_specific;
1651 struct gaudi_internal_qman_info *q;
1654 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1655 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1658 q = &gaudi->internal_qmans[i];
1661 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1662 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1664 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1665 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1667 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1668 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1670 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1671 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1674 dev_err(hdev->dev, "Bad internal queue index %d", i);
1676 goto free_internal_qmans_pq_mem;
1679 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1682 GFP_KERNEL | __GFP_ZERO);
1683 if (!q->pq_kernel_addr) {
1685 goto free_internal_qmans_pq_mem;
1691 free_internal_qmans_pq_mem:
1692 gaudi_free_internal_qmans_pq_mem(hdev);
1696 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1698 struct asic_fixed_properties *prop = &hdev->asic_prop;
1699 struct pci_mem_region *region;
1702 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1703 region->region_base = CFG_BASE;
1704 region->region_size = CFG_SIZE;
1705 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1706 region->bar_size = CFG_BAR_SIZE;
1707 region->bar_id = CFG_BAR_ID;
1711 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1712 region->region_base = SRAM_BASE_ADDR;
1713 region->region_size = SRAM_SIZE;
1714 region->offset_in_bar = 0;
1715 region->bar_size = SRAM_BAR_SIZE;
1716 region->bar_id = SRAM_BAR_ID;
1720 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1721 region->region_base = DRAM_PHYS_BASE;
1722 region->region_size = hdev->asic_prop.dram_size;
1723 region->offset_in_bar = 0;
1724 region->bar_size = prop->dram_pci_bar_size;
1725 region->bar_id = HBM_BAR_ID;
1729 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1730 region->region_base = PSOC_SCRATCHPAD_ADDR;
1731 region->region_size = PSOC_SCRATCHPAD_SIZE;
1732 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1733 region->bar_size = CFG_BAR_SIZE;
1734 region->bar_id = CFG_BAR_ID;
1738 static int gaudi_sw_init(struct hl_device *hdev)
1740 struct gaudi_device *gaudi;
1741 u32 i, event_id = 0;
1744 /* Allocate device structure */
1745 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1749 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1750 if (gaudi_irq_map_table[i].valid) {
1751 if (event_id == GAUDI_EVENT_SIZE) {
1753 "Event array exceeds the limit of %u events\n",
1756 goto free_gaudi_device;
1759 gaudi->events[event_id++] =
1760 gaudi_irq_map_table[i].fc_id;
1764 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1766 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1768 hdev->asic_specific = gaudi;
1770 /* Create DMA pool for small allocations */
1771 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1772 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1773 if (!hdev->dma_pool) {
1774 dev_err(hdev->dev, "failed to create DMA pool\n");
1776 goto free_gaudi_device;
1779 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1783 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1784 if (!hdev->cpu_accessible_dma_pool) {
1786 "Failed to create CPU accessible DMA pool\n");
1788 goto free_cpu_dma_mem;
1791 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1792 (uintptr_t) hdev->cpu_accessible_dma_mem,
1793 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1796 "Failed to add memory to CPU accessible DMA pool\n");
1798 goto free_cpu_accessible_dma_pool;
1801 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1803 goto free_cpu_accessible_dma_pool;
1805 spin_lock_init(&gaudi->hw_queues_lock);
1806 mutex_init(&gaudi->clk_gate_mutex);
1808 hdev->supports_sync_stream = true;
1809 hdev->supports_coresight = true;
1810 hdev->supports_staged_submission = true;
1812 gaudi_set_pci_memory_regions(hdev);
1816 free_cpu_accessible_dma_pool:
1817 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1819 if (!hdev->asic_prop.fw_security_enabled)
1820 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1821 hdev->cpu_pci_msb_addr);
1822 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1823 HL_CPU_ACCESSIBLE_MEM_SIZE,
1824 hdev->cpu_accessible_dma_mem,
1825 hdev->cpu_accessible_dma_address);
1827 dma_pool_destroy(hdev->dma_pool);
1833 static int gaudi_sw_fini(struct hl_device *hdev)
1835 struct gaudi_device *gaudi = hdev->asic_specific;
1837 gaudi_free_internal_qmans_pq_mem(hdev);
1839 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1841 if (!hdev->asic_prop.fw_security_enabled)
1842 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1843 hdev->cpu_pci_msb_addr);
1845 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1846 HL_CPU_ACCESSIBLE_MEM_SIZE,
1847 hdev->cpu_accessible_dma_mem,
1848 hdev->cpu_accessible_dma_address);
1850 dma_pool_destroy(hdev->dma_pool);
1852 mutex_destroy(&gaudi->clk_gate_mutex);
1859 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1861 struct hl_device *hdev = arg;
1867 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1868 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1870 hl_irq_handler_eq(irq, &hdev->event_queue);
1876 * For backward compatibility, new MSI interrupts should be set after the
1877 * existing CPU and NIC interrupts.
1879 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1884 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1885 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1886 GAUDI_EVENT_QUEUE_MSI_IDX);
1888 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1889 (nr + NIC_NUMBER_OF_ENGINES + 1);
1891 return pci_irq_vector(hdev->pdev, msi_vec);
1894 static int gaudi_enable_msi_single(struct hl_device *hdev)
1898 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1900 irq = gaudi_pci_irq_vector(hdev, 0, false);
1901 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1902 "gaudi single msi", hdev);
1905 "Failed to request single MSI IRQ\n");
1910 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1912 int cq_cnt = hdev->asic_prop.completion_queues_count;
1913 int rc, i, irq_cnt_init, irq;
1915 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1916 irq = gaudi_pci_irq_vector(hdev, i, false);
1917 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1918 &hdev->completion_queue[i]);
1920 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1925 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1926 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1927 &hdev->event_queue);
1929 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1936 for (i = 0 ; i < irq_cnt_init ; i++)
1937 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1938 &hdev->completion_queue[i]);
1942 static int gaudi_enable_msi(struct hl_device *hdev)
1944 struct gaudi_device *gaudi = hdev->asic_specific;
1947 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1950 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1952 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1956 if (rc < NUMBER_OF_INTERRUPTS) {
1957 gaudi->multi_msi_mode = false;
1958 rc = gaudi_enable_msi_single(hdev);
1960 gaudi->multi_msi_mode = true;
1961 rc = gaudi_enable_msi_multi(hdev);
1965 goto free_pci_irq_vectors;
1967 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1971 free_pci_irq_vectors:
1972 pci_free_irq_vectors(hdev->pdev);
1976 static void gaudi_sync_irqs(struct hl_device *hdev)
1978 struct gaudi_device *gaudi = hdev->asic_specific;
1979 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1981 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1984 /* Wait for all pending IRQs to be finished */
1985 if (gaudi->multi_msi_mode) {
1986 for (i = 0 ; i < cq_cnt ; i++)
1987 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1989 synchronize_irq(gaudi_pci_irq_vector(hdev,
1990 GAUDI_EVENT_QUEUE_MSI_IDX,
1993 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1997 static void gaudi_disable_msi(struct hl_device *hdev)
1999 struct gaudi_device *gaudi = hdev->asic_specific;
2000 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2002 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2005 gaudi_sync_irqs(hdev);
2007 if (gaudi->multi_msi_mode) {
2008 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2010 free_irq(irq, &hdev->event_queue);
2012 for (i = 0 ; i < cq_cnt ; i++) {
2013 irq = gaudi_pci_irq_vector(hdev, i, false);
2014 free_irq(irq, &hdev->completion_queue[i]);
2017 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2020 pci_free_irq_vectors(hdev->pdev);
2022 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2025 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2027 struct gaudi_device *gaudi = hdev->asic_specific;
2029 if (hdev->asic_prop.fw_security_enabled)
2032 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2033 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2036 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2039 if (!hdev->sram_scrambler_enable)
2042 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2043 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2044 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2045 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2046 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2047 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2048 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2049 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2050 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2051 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2052 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2053 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2054 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2055 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2056 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2057 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2059 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2060 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2061 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2062 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2063 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2064 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2065 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2066 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2067 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2068 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2069 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2070 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2071 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2072 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2073 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2074 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2076 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2077 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2078 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2079 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2080 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2081 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2082 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2083 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2084 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2085 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2086 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2087 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2088 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2089 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2090 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2091 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2093 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2096 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2098 struct gaudi_device *gaudi = hdev->asic_specific;
2100 if (hdev->asic_prop.fw_security_enabled)
2103 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2104 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2107 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2110 if (!hdev->dram_scrambler_enable)
2113 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2114 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2115 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2116 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2117 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2118 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2119 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2120 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2121 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2122 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2123 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2124 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2125 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2126 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2127 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2128 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2130 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2131 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2132 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2133 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2134 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2135 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2136 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2137 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2138 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2139 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2140 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2141 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2142 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2143 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2144 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2145 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2147 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2148 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2149 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2150 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2151 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2152 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2153 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2154 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2155 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2156 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2157 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2158 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2159 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2160 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2161 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2162 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2164 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2167 static void gaudi_init_e2e(struct hl_device *hdev)
2169 if (hdev->asic_prop.fw_security_enabled)
2172 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2173 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2176 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2177 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2178 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2179 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2181 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2182 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2183 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2184 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2186 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2187 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2188 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2189 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2191 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2192 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2193 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2194 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2196 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2197 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2198 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2199 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2201 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2202 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2203 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2204 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2206 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2207 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2208 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2209 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2211 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2212 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2213 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2214 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2216 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2217 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2218 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2219 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2221 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2222 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2223 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2224 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2226 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2227 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2228 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2229 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2231 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2232 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2233 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2234 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2236 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2237 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2238 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2239 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2241 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2242 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2243 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2244 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2246 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2247 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2248 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2249 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2251 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2252 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2253 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2254 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2256 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2257 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2258 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2259 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2261 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2262 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2263 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2264 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2266 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2267 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2268 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2269 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2271 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2272 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2273 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2274 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2276 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2277 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2278 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2279 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2281 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2282 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2283 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2284 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2286 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2287 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2288 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2289 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2291 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2292 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2293 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2294 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2296 if (!hdev->dram_scrambler_enable) {
2297 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2298 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2299 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2300 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2302 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2303 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2304 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2305 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2307 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2308 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2309 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2310 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2312 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2313 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2314 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2315 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2317 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2318 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2319 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2320 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2322 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2323 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2324 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2325 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2327 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2328 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2329 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2330 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2332 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2333 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2334 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2335 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2337 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2338 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2339 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2340 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2342 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2343 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2344 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2345 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2347 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2348 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2349 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2350 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2352 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2353 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2354 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2355 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2357 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2358 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2359 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2360 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2362 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2363 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2364 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2365 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2367 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2368 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2369 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2370 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2372 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2373 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2374 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2375 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2377 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2378 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2379 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2380 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2382 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2383 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2384 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2385 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2387 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2388 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2389 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2390 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2392 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2393 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2394 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2395 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2397 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2398 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2399 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2400 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2402 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2403 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2404 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2405 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2407 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2408 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2409 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2410 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2412 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2413 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2414 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2415 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2418 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2419 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2420 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2421 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2423 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2424 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2425 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2426 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2428 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2429 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2430 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2431 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2433 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2434 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2435 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2436 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2438 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2439 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2440 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2441 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2443 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2444 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2445 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2446 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2448 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2449 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2450 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2451 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2453 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2454 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2455 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2456 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2458 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2459 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2460 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2461 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2463 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2464 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2465 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2466 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2468 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2469 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2470 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2471 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2473 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2474 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2475 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2476 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2478 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2479 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2480 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2481 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2483 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2484 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2485 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2486 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2488 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2489 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2490 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2491 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2493 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2494 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2495 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2496 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2498 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2499 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2500 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2501 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2503 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2504 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2505 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2506 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2508 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2509 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2510 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2511 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2513 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2514 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2515 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2516 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2518 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2519 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2520 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2521 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2523 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2524 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2525 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2526 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2528 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2529 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2530 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2531 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2533 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2534 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2535 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2536 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2539 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2541 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2543 if (hdev->asic_prop.fw_security_enabled)
2546 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2547 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2550 hbm0_wr = 0x33333333;
2551 hbm0_rd = 0x77777777;
2552 hbm1_wr = 0x55555555;
2553 hbm1_rd = 0xDDDDDDDD;
2555 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2556 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2557 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2558 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2560 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2561 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2562 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2563 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2565 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2566 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2567 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2568 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2570 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2571 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2572 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2573 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2575 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2576 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2577 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2578 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2579 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2580 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2581 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2582 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2583 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2584 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2585 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2586 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2588 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2589 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2590 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2591 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2592 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2593 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2594 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2595 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2596 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2597 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2598 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2599 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2602 static void gaudi_init_golden_registers(struct hl_device *hdev)
2607 gaudi_init_e2e(hdev);
2608 gaudi_init_hbm_cred(hdev);
2610 for (tpc_id = 0, tpc_offset = 0;
2611 tpc_id < TPC_NUMBER_OF_ENGINES;
2612 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2613 /* Mask all arithmetic interrupts from TPC */
2614 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2615 /* Set 16 cache lines */
2616 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2617 ICACHE_FETCH_LINE_NUM, 2);
2620 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2621 for (i = 0 ; i < 128 ; i += 8)
2622 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2624 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2625 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2626 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2627 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2630 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2631 int qman_id, dma_addr_t qman_pq_addr)
2633 struct cpu_dyn_regs *dyn_regs =
2634 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2635 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2636 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2637 u32 q_off, dma_qm_offset;
2638 u32 dma_qm_err_cfg, irq_handler_offset;
2640 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2642 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2643 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2644 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2645 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2646 so_base_en_lo = lower_32_bits(CFG_BASE +
2647 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2648 so_base_en_hi = upper_32_bits(CFG_BASE +
2649 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2650 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2651 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2652 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2653 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2654 so_base_ws_lo = lower_32_bits(CFG_BASE +
2655 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2656 so_base_ws_hi = upper_32_bits(CFG_BASE +
2657 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2659 q_off = dma_qm_offset + qman_id * 4;
2661 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2662 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2664 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2665 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2666 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2668 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2669 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2670 QMAN_LDMA_SRC_OFFSET);
2671 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2672 QMAN_LDMA_DST_OFFSET);
2674 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2675 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2676 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2677 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2678 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2679 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2680 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2681 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2683 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2685 /* The following configuration is needed only once per QMAN */
2687 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2688 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2689 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2691 /* Configure RAZWI IRQ */
2692 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2693 if (hdev->stop_on_err)
2695 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2697 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2699 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2700 lower_32_bits(CFG_BASE + irq_handler_offset));
2701 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2702 upper_32_bits(CFG_BASE + irq_handler_offset));
2704 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2705 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2708 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2709 QM_ARB_ERR_MSG_EN_MASK);
2711 /* Increase ARB WDT to support streams architecture */
2712 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2713 GAUDI_ARB_WDT_TIMEOUT);
2715 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2716 QMAN_EXTERNAL_MAKE_TRUSTED);
2718 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2722 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2724 struct cpu_dyn_regs *dyn_regs =
2725 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2726 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2727 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2728 u32 irq_handler_offset;
2730 /* Set to maximum possible according to physical size */
2731 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2732 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2734 /* WA for H/W bug H3-2116 */
2735 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2737 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2738 if (hdev->stop_on_err)
2739 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2741 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2743 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2744 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2745 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2747 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2748 lower_32_bits(CFG_BASE + irq_handler_offset));
2749 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2750 upper_32_bits(CFG_BASE + irq_handler_offset));
2752 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2753 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2754 WREG32(mmDMA0_CORE_PROT + dma_offset,
2755 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2756 /* If the channel is secured, it should be in MMU bypass mode */
2757 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2758 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2759 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2762 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2765 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2767 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2770 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2772 struct gaudi_device *gaudi = hdev->asic_specific;
2773 struct hl_hw_queue *q;
2774 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2776 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2779 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2780 dma_id = gaudi_dma_assignment[i];
2782 * For queues after the CPU Q need to add 1 to get the correct
2783 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2784 * order to get the correct MSI register.
2788 nic_skip = NIC_NUMBER_OF_ENGINES;
2794 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2795 q_idx = 4 * dma_id + j + cpu_skip;
2796 q = &hdev->kernel_queues[q_idx];
2798 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2799 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2803 gaudi_init_dma_core(hdev, dma_id);
2805 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2808 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2811 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2812 int qman_id, u64 qman_base_addr)
2814 struct cpu_dyn_regs *dyn_regs =
2815 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2816 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2817 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2818 u32 dma_qm_err_cfg, irq_handler_offset;
2819 u32 q_off, dma_qm_offset;
2821 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2823 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2824 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2825 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2826 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2827 so_base_en_lo = lower_32_bits(CFG_BASE +
2828 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2829 so_base_en_hi = upper_32_bits(CFG_BASE +
2830 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2831 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2832 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2833 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2834 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2835 so_base_ws_lo = lower_32_bits(CFG_BASE +
2836 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2837 so_base_ws_hi = upper_32_bits(CFG_BASE +
2838 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2840 q_off = dma_qm_offset + qman_id * 4;
2843 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2844 lower_32_bits(qman_base_addr));
2845 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2846 upper_32_bits(qman_base_addr));
2848 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2849 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2850 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2852 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2853 QMAN_CPDMA_SIZE_OFFSET);
2854 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2855 QMAN_CPDMA_SRC_OFFSET);
2856 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2857 QMAN_CPDMA_DST_OFFSET);
2859 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2860 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2861 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2863 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2864 QMAN_LDMA_SIZE_OFFSET);
2865 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2866 QMAN_LDMA_SRC_OFFSET);
2867 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2868 QMAN_LDMA_DST_OFFSET);
2870 /* Configure RAZWI IRQ */
2871 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2872 if (hdev->stop_on_err)
2874 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2876 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2878 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2879 lower_32_bits(CFG_BASE + irq_handler_offset));
2880 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2881 upper_32_bits(CFG_BASE + irq_handler_offset));
2883 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2884 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2887 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2888 QM_ARB_ERR_MSG_EN_MASK);
2890 /* Increase ARB WDT to support streams architecture */
2891 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2892 GAUDI_ARB_WDT_TIMEOUT);
2894 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2895 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2896 QMAN_INTERNAL_MAKE_TRUSTED);
2899 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2900 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2901 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2902 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2904 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2905 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2906 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2908 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2910 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2912 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2917 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2919 struct gaudi_device *gaudi = hdev->asic_specific;
2920 struct gaudi_internal_qman_info *q;
2922 int i, j, dma_id, internal_q_index;
2924 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2927 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2928 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2930 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2932 * Add the CPU queue in order to get the correct queue
2933 * number as all internal queue are placed after it
2935 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2937 q = &gaudi->internal_qmans[internal_q_index];
2938 qman_base_addr = (u64) q->pq_dma_addr;
2939 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2943 /* Initializing lower CP for HBM DMA QMAN */
2944 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2946 gaudi_init_dma_core(hdev, dma_id);
2948 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2951 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2954 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2955 int qman_id, u64 qman_base_addr)
2957 struct cpu_dyn_regs *dyn_regs =
2958 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2959 u32 mtr_base_lo, mtr_base_hi;
2960 u32 so_base_lo, so_base_hi;
2961 u32 irq_handler_offset;
2965 mtr_base_lo = lower_32_bits(CFG_BASE +
2966 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2967 mtr_base_hi = upper_32_bits(CFG_BASE +
2968 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2969 so_base_lo = lower_32_bits(CFG_BASE +
2970 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2971 so_base_hi = upper_32_bits(CFG_BASE +
2972 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2974 q_off = mme_offset + qman_id * 4;
2977 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2978 lower_32_bits(qman_base_addr));
2979 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2980 upper_32_bits(qman_base_addr));
2982 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2983 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2984 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2986 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2987 QMAN_CPDMA_SIZE_OFFSET);
2988 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2989 QMAN_CPDMA_SRC_OFFSET);
2990 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2991 QMAN_CPDMA_DST_OFFSET);
2993 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2994 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2995 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2997 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2998 QMAN_LDMA_SIZE_OFFSET);
2999 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3000 QMAN_LDMA_SRC_OFFSET);
3001 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3002 QMAN_LDMA_DST_OFFSET);
3004 /* Configure RAZWI IRQ */
3005 mme_id = mme_offset /
3006 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3008 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3009 if (hdev->stop_on_err)
3011 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3013 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3015 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3016 lower_32_bits(CFG_BASE + irq_handler_offset));
3017 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3018 upper_32_bits(CFG_BASE + irq_handler_offset));
3020 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3021 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3024 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3025 QM_ARB_ERR_MSG_EN_MASK);
3027 /* Increase ARB WDT to support streams architecture */
3028 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3029 GAUDI_ARB_WDT_TIMEOUT);
3031 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3032 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3033 QMAN_INTERNAL_MAKE_TRUSTED);
3036 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3037 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3038 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3039 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3042 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3044 struct gaudi_device *gaudi = hdev->asic_specific;
3045 struct gaudi_internal_qman_info *q;
3048 int i, internal_q_index;
3050 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3054 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3055 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3058 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3060 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3061 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3062 q = &gaudi->internal_qmans[internal_q_index];
3063 qman_base_addr = (u64) q->pq_dma_addr;
3064 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3070 /* Initializing lower CP for MME QMANs */
3071 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3072 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3073 gaudi_init_mme_qman(hdev, 0, 4, 0);
3075 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3076 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3078 gaudi->hw_cap_initialized |= HW_CAP_MME;
3081 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3082 int qman_id, u64 qman_base_addr)
3084 struct cpu_dyn_regs *dyn_regs =
3085 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3086 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3087 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3088 u32 tpc_qm_err_cfg, irq_handler_offset;
3091 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3092 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3093 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3094 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3095 so_base_en_lo = lower_32_bits(CFG_BASE +
3096 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3097 so_base_en_hi = upper_32_bits(CFG_BASE +
3098 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3099 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3100 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3101 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3102 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3103 so_base_ws_lo = lower_32_bits(CFG_BASE +
3104 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3105 so_base_ws_hi = upper_32_bits(CFG_BASE +
3106 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3108 q_off = tpc_offset + qman_id * 4;
3110 tpc_id = tpc_offset /
3111 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3114 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3115 lower_32_bits(qman_base_addr));
3116 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3117 upper_32_bits(qman_base_addr));
3119 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3120 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3121 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3123 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3124 QMAN_CPDMA_SIZE_OFFSET);
3125 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3126 QMAN_CPDMA_SRC_OFFSET);
3127 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3128 QMAN_CPDMA_DST_OFFSET);
3130 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3131 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3132 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3134 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3135 QMAN_LDMA_SIZE_OFFSET);
3136 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3137 QMAN_LDMA_SRC_OFFSET);
3138 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3139 QMAN_LDMA_DST_OFFSET);
3141 /* Configure RAZWI IRQ */
3142 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3143 if (hdev->stop_on_err)
3145 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3147 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3149 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3150 lower_32_bits(CFG_BASE + irq_handler_offset));
3151 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3152 upper_32_bits(CFG_BASE + irq_handler_offset));
3154 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3155 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3158 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3159 QM_ARB_ERR_MSG_EN_MASK);
3161 /* Increase ARB WDT to support streams architecture */
3162 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3163 GAUDI_ARB_WDT_TIMEOUT);
3165 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3166 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3167 QMAN_INTERNAL_MAKE_TRUSTED);
3170 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3171 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3172 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3173 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3175 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3177 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3179 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3181 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3183 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3188 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3190 struct gaudi_device *gaudi = hdev->asic_specific;
3191 struct gaudi_internal_qman_info *q;
3193 u32 so_base_hi, tpc_offset = 0;
3194 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3195 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3196 int i, tpc_id, internal_q_index;
3198 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3201 so_base_hi = upper_32_bits(CFG_BASE +
3202 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3204 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3205 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3206 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3207 tpc_id * QMAN_STREAMS + i;
3208 q = &gaudi->internal_qmans[internal_q_index];
3209 qman_base_addr = (u64) q->pq_dma_addr;
3210 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3214 /* Initializing lower CP for TPC QMAN */
3215 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3217 /* Enable the QMAN and TPC channel */
3218 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3223 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3226 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3228 gaudi->hw_cap_initialized |=
3229 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3233 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3234 int qman_id, u64 qman_base_addr, int nic_id)
3236 struct cpu_dyn_regs *dyn_regs =
3237 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3238 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3239 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3240 u32 nic_qm_err_cfg, irq_handler_offset;
3243 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3244 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3245 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3246 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3247 so_base_en_lo = lower_32_bits(CFG_BASE +
3248 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3249 so_base_en_hi = upper_32_bits(CFG_BASE +
3250 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3251 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3252 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3253 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3254 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3255 so_base_ws_lo = lower_32_bits(CFG_BASE +
3256 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3257 so_base_ws_hi = upper_32_bits(CFG_BASE +
3258 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3260 q_off = nic_offset + qman_id * 4;
3262 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3263 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3265 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3266 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3267 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3269 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3270 QMAN_LDMA_SIZE_OFFSET);
3271 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3272 QMAN_LDMA_SRC_OFFSET);
3273 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3274 QMAN_LDMA_DST_OFFSET);
3276 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3277 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3278 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3279 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3281 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3282 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3283 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3284 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3285 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3288 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3289 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3290 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3292 /* Configure RAZWI IRQ */
3293 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3294 if (hdev->stop_on_err)
3296 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3298 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3300 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3301 lower_32_bits(CFG_BASE + irq_handler_offset));
3302 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3303 upper_32_bits(CFG_BASE + irq_handler_offset));
3305 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3306 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3309 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3310 QM_ARB_ERR_MSG_EN_MASK);
3312 /* Increase ARB WDT to support streams architecture */
3313 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3314 GAUDI_ARB_WDT_TIMEOUT);
3316 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3317 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3318 QMAN_INTERNAL_MAKE_TRUSTED);
3322 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3324 struct gaudi_device *gaudi = hdev->asic_specific;
3325 struct gaudi_internal_qman_info *q;
3328 u32 nic_delta_between_qmans =
3329 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3330 u32 nic_delta_between_nics =
3331 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3332 int i, nic_id, internal_q_index;
3334 if (!hdev->nic_ports_mask)
3337 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3340 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3342 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3343 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3344 nic_offset += nic_delta_between_qmans;
3346 nic_offset -= (nic_delta_between_qmans * 2);
3347 nic_offset += nic_delta_between_nics;
3352 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3353 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3354 nic_id * QMAN_STREAMS + i;
3355 q = &gaudi->internal_qmans[internal_q_index];
3356 qman_base_addr = (u64) q->pq_dma_addr;
3357 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3358 qman_base_addr, nic_id);
3361 /* Enable the QMAN */
3362 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3364 nic_offset += nic_delta_between_qmans;
3366 nic_offset -= (nic_delta_between_qmans * 2);
3367 nic_offset += nic_delta_between_nics;
3370 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3374 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3376 struct gaudi_device *gaudi = hdev->asic_specific;
3378 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3381 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3382 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3383 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3386 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3388 struct gaudi_device *gaudi = hdev->asic_specific;
3390 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3393 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3394 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3395 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3396 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3397 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3400 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3402 struct gaudi_device *gaudi = hdev->asic_specific;
3404 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3407 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3408 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3411 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3413 struct gaudi_device *gaudi = hdev->asic_specific;
3417 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3420 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3421 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3422 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3426 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3428 struct gaudi_device *gaudi = hdev->asic_specific;
3429 u32 nic_mask, nic_offset = 0;
3430 u32 nic_delta_between_qmans =
3431 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3432 u32 nic_delta_between_nics =
3433 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3436 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3437 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3439 if (gaudi->hw_cap_initialized & nic_mask)
3440 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3442 nic_offset += nic_delta_between_qmans;
3444 nic_offset -= (nic_delta_between_qmans * 2);
3445 nic_offset += nic_delta_between_nics;
3450 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3452 struct gaudi_device *gaudi = hdev->asic_specific;
3454 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3457 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3458 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3459 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3460 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3463 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3465 struct gaudi_device *gaudi = hdev->asic_specific;
3467 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3470 /* Stop CPs of HBM DMA QMANs */
3472 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3475 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3476 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3479 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3481 struct gaudi_device *gaudi = hdev->asic_specific;
3483 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3486 /* Stop CPs of MME QMANs */
3487 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3488 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3491 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3493 struct gaudi_device *gaudi = hdev->asic_specific;
3495 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3498 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3499 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3500 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3501 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3502 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3503 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3504 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3505 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3508 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3510 struct gaudi_device *gaudi = hdev->asic_specific;
3512 /* Stop upper CPs of QMANs */
3514 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3515 WREG32(mmNIC0_QM0_GLBL_CFG1,
3516 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3517 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3518 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3520 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3521 WREG32(mmNIC0_QM1_GLBL_CFG1,
3522 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3526 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3527 WREG32(mmNIC1_QM0_GLBL_CFG1,
3528 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3532 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3533 WREG32(mmNIC1_QM1_GLBL_CFG1,
3534 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3538 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3539 WREG32(mmNIC2_QM0_GLBL_CFG1,
3540 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3544 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3545 WREG32(mmNIC2_QM1_GLBL_CFG1,
3546 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3547 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3548 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3550 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3551 WREG32(mmNIC3_QM0_GLBL_CFG1,
3552 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3553 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3554 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3556 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3557 WREG32(mmNIC3_QM1_GLBL_CFG1,
3558 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3559 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3560 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3562 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3563 WREG32(mmNIC4_QM0_GLBL_CFG1,
3564 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3565 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3566 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3568 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3569 WREG32(mmNIC4_QM1_GLBL_CFG1,
3570 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3571 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3572 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3575 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3577 struct gaudi_device *gaudi = hdev->asic_specific;
3579 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3582 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3583 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3584 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3587 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3589 struct gaudi_device *gaudi = hdev->asic_specific;
3591 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3594 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3595 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3596 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3597 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3598 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3601 static void gaudi_mme_stall(struct hl_device *hdev)
3603 struct gaudi_device *gaudi = hdev->asic_specific;
3605 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3608 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3609 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3610 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3611 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3612 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3613 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3614 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3615 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3616 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3617 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3618 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3619 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3620 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3621 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3622 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3623 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3624 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3627 static void gaudi_tpc_stall(struct hl_device *hdev)
3629 struct gaudi_device *gaudi = hdev->asic_specific;
3631 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3634 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3635 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3636 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3637 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3638 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3639 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3640 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3641 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3644 static void gaudi_set_clock_gating(struct hl_device *hdev)
3646 struct gaudi_device *gaudi = hdev->asic_specific;
3651 /* In case we are during debug session, don't enable the clock gate
3652 * as it may interfere
3657 if (hdev->asic_prop.fw_security_enabled)
3660 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3661 enable = !!(hdev->clock_gating_mask &
3662 (BIT_ULL(gaudi_dma_assignment[i])));
3664 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3665 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3666 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3667 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3668 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3671 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3672 enable = !!(hdev->clock_gating_mask &
3673 (BIT_ULL(gaudi_dma_assignment[i])));
3675 /* GC sends work to DMA engine through Upper CP in DMA5 so
3676 * we need to not enable clock gating in that DMA
3678 if (i == GAUDI_HBM_DMA_4)
3681 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3682 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3683 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3684 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3685 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3688 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3689 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3690 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3692 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3693 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3694 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3696 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3697 enable = !!(hdev->clock_gating_mask &
3698 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3700 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3701 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3702 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3703 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3705 qman_offset += TPC_QMAN_OFFSET;
3708 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3711 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3713 struct gaudi_device *gaudi = hdev->asic_specific;
3717 if (hdev->asic_prop.fw_security_enabled)
3720 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3721 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3722 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3724 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3727 WREG32(mmMME0_QM_CGM_CFG, 0);
3728 WREG32(mmMME0_QM_CGM_CFG1, 0);
3729 WREG32(mmMME2_QM_CGM_CFG, 0);
3730 WREG32(mmMME2_QM_CGM_CFG1, 0);
3732 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3733 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3734 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3736 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3739 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3742 static void gaudi_enable_timestamp(struct hl_device *hdev)
3744 /* Disable the timestamp counter */
3745 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3747 /* Zero the lower/upper parts of the 64-bit counter */
3748 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3749 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3751 /* Enable the counter */
3752 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3755 static void gaudi_disable_timestamp(struct hl_device *hdev)
3757 /* Disable the timestamp counter */
3758 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3761 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3763 u32 wait_timeout_ms;
3766 "Halting compute engines and disabling interrupts\n");
3769 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3771 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3773 gaudi_stop_nic_qmans(hdev);
3774 gaudi_stop_mme_qmans(hdev);
3775 gaudi_stop_tpc_qmans(hdev);
3776 gaudi_stop_hbm_dma_qmans(hdev);
3777 gaudi_stop_pci_dma_qmans(hdev);
3779 hdev->asic_funcs->disable_clock_gating(hdev);
3781 msleep(wait_timeout_ms);
3783 gaudi_pci_dma_stall(hdev);
3784 gaudi_hbm_dma_stall(hdev);
3785 gaudi_tpc_stall(hdev);
3786 gaudi_mme_stall(hdev);
3788 msleep(wait_timeout_ms);
3790 gaudi_disable_nic_qmans(hdev);
3791 gaudi_disable_mme_qmans(hdev);
3792 gaudi_disable_tpc_qmans(hdev);
3793 gaudi_disable_hbm_dma_qmans(hdev);
3794 gaudi_disable_pci_dma_qmans(hdev);
3796 gaudi_disable_timestamp(hdev);
3798 gaudi_disable_msi(hdev);
3801 static int gaudi_mmu_init(struct hl_device *hdev)
3803 struct asic_fixed_properties *prop = &hdev->asic_prop;
3804 struct gaudi_device *gaudi = hdev->asic_specific;
3808 if (!hdev->mmu_enable)
3811 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3814 for (i = 0 ; i < prop->max_asid ; i++) {
3815 hop0_addr = prop->mmu_pgt_addr +
3816 (i * prop->mmu_hop_table_size);
3818 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3821 "failed to set hop0 addr for asid %d\n", i);
3826 /* init MMU cache manage page */
3827 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3828 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3830 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3832 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3833 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3835 WREG32(mmSTLB_HOP_CONFIGURATION,
3836 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3839 * The H/W expects the first PI after init to be 1. After wraparound
3842 gaudi->mmu_cache_inv_pi = 1;
3844 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3852 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3856 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3858 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3861 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3865 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3867 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3870 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3872 struct dynamic_fw_load_mgr *dynamic_loader;
3873 struct cpu_dyn_regs *dyn_regs;
3875 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3878 * here we update initial values for few specific dynamic regs (as
3879 * before reading the first descriptor from FW those value has to be
3880 * hard-coded) in later stages of the protocol those values will be
3881 * updated automatically by reading the FW descriptor so data there
3882 * will always be up-to-date
3884 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3885 dyn_regs->kmd_msg_to_cpu =
3886 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3887 dyn_regs->cpu_cmd_status_to_host =
3888 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3890 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3893 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3895 struct static_fw_load_mgr *static_loader;
3897 static_loader = &hdev->fw_loader.static_loader;
3899 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3900 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3901 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3902 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3903 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3904 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3905 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3906 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3907 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3908 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3909 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3910 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3911 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3912 GAUDI_PLDM_RESET_WAIT_MSEC :
3913 GAUDI_CPU_RESET_WAIT_MSEC;
3916 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3918 struct asic_fixed_properties *prop = &hdev->asic_prop;
3919 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3921 /* fill common fields */
3922 fw_loader->linux_loaded = false;
3923 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3924 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3925 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3926 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3927 fw_loader->skip_bmc = !hdev->bmc_enable;
3928 fw_loader->sram_bar_id = SRAM_BAR_ID;
3929 fw_loader->dram_bar_id = HBM_BAR_ID;
3931 if (prop->dynamic_fw_load)
3932 gaudi_init_dynamic_firmware_loader(hdev);
3934 gaudi_init_static_firmware_loader(hdev);
3937 static int gaudi_init_cpu(struct hl_device *hdev)
3939 struct gaudi_device *gaudi = hdev->asic_specific;
3942 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3945 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3949 * The device CPU works with 40 bits addresses.
3950 * This register sets the extension to 50 bits.
3952 if (!hdev->asic_prop.fw_security_enabled)
3953 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3955 rc = hl_fw_init_cpu(hdev);
3960 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3965 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3967 struct cpu_dyn_regs *dyn_regs =
3968 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3969 struct asic_fixed_properties *prop = &hdev->asic_prop;
3970 struct gaudi_device *gaudi = hdev->asic_specific;
3971 u32 status, irq_handler_offset;
3973 struct hl_hw_queue *cpu_pq =
3974 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3977 if (!hdev->cpu_queues_enable)
3980 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3983 eq = &hdev->event_queue;
3985 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3986 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3988 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3989 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3991 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3992 lower_32_bits(hdev->cpu_accessible_dma_address));
3993 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3994 upper_32_bits(hdev->cpu_accessible_dma_address));
3996 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3997 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3998 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4000 /* Used for EQ CI */
4001 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4003 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4005 if (gaudi->multi_msi_mode)
4006 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4008 WREG32(mmCPU_IF_QUEUE_INIT,
4009 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4011 irq_handler_offset = prop->gic_interrupts_enable ?
4012 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4013 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4015 WREG32(irq_handler_offset,
4016 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4018 err = hl_poll_timeout(
4020 mmCPU_IF_QUEUE_INIT,
4022 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4028 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4032 /* update FW application security bits */
4033 if (prop->fw_cpu_boot_dev_sts0_valid)
4034 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4035 if (prop->fw_cpu_boot_dev_sts1_valid)
4036 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4038 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4042 static void gaudi_pre_hw_init(struct hl_device *hdev)
4044 /* Perform read from the device to make sure device is up */
4047 if (!hdev->asic_prop.fw_security_enabled) {
4048 /* Set the access through PCI bars (Linux driver only) as
4051 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4052 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4053 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4055 /* Perform read to flush the waiting writes to ensure
4056 * configuration was set in the device
4058 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4062 * Let's mark in the H/W that we have reached this point. We check
4063 * this value in the reset_before_init function to understand whether
4064 * we need to reset the chip before doing H/W init. This register is
4065 * cleared by the H/W upon H/W reset
4067 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4070 static int gaudi_hw_init(struct hl_device *hdev)
4072 struct gaudi_device *gaudi = hdev->asic_specific;
4075 gaudi_pre_hw_init(hdev);
4077 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4078 * So we set it here and if anyone tries to move it later to
4079 * a different address, there will be an error
4081 if (hdev->asic_prop.iatu_done_by_fw)
4082 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4085 * Before pushing u-boot/linux to device, need to set the hbm bar to
4086 * base address of dram
4088 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4090 "failed to map HBM bar to DRAM base address\n");
4094 rc = gaudi_init_cpu(hdev);
4096 dev_err(hdev->dev, "failed to initialize CPU\n");
4100 /* In case the clock gating was enabled in preboot we need to disable
4101 * it here before touching the MME/TPC registers.
4102 * There is no need to take clk gating mutex because when this function
4103 * runs, no other relevant code can run
4105 hdev->asic_funcs->disable_clock_gating(hdev);
4107 /* SRAM scrambler must be initialized after CPU is running from HBM */
4108 gaudi_init_scrambler_sram(hdev);
4110 /* This is here just in case we are working without CPU */
4111 gaudi_init_scrambler_hbm(hdev);
4113 gaudi_init_golden_registers(hdev);
4115 rc = gaudi_mmu_init(hdev);
4119 gaudi_init_security(hdev);
4121 gaudi_init_pci_dma_qmans(hdev);
4123 gaudi_init_hbm_dma_qmans(hdev);
4125 gaudi_init_mme_qmans(hdev);
4127 gaudi_init_tpc_qmans(hdev);
4129 gaudi_init_nic_qmans(hdev);
4131 hdev->asic_funcs->set_clock_gating(hdev);
4133 gaudi_enable_timestamp(hdev);
4135 /* MSI must be enabled before CPU queues and NIC are initialized */
4136 rc = gaudi_enable_msi(hdev);
4138 goto disable_queues;
4140 /* must be called after MSI was enabled */
4141 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4143 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4148 /* Perform read from the device to flush all configuration */
4154 gaudi_disable_msi(hdev);
4156 gaudi_disable_mme_qmans(hdev);
4157 gaudi_disable_pci_dma_qmans(hdev);
4162 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4164 struct cpu_dyn_regs *dyn_regs =
4165 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4166 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4167 struct gaudi_device *gaudi = hdev->asic_specific;
4168 bool driver_performs_reset;
4171 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4176 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4177 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4179 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4180 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4183 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4184 !hdev->asic_prop.hard_reset_done_by_fw);
4186 /* Set device to handle FLR by H/W as we will put the device CPU to
4189 if (driver_performs_reset)
4190 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4191 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4193 /* If linux is loaded in the device CPU we need to communicate with it
4194 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4195 * registers in case of old F/Ws
4197 if (hdev->fw_loader.linux_loaded) {
4198 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4199 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4200 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4202 WREG32(irq_handler_offset,
4203 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4205 if (hdev->asic_prop.hard_reset_done_by_fw)
4206 hl_fw_ask_hard_reset_without_linux(hdev);
4208 hl_fw_ask_halt_machine_without_linux(hdev);
4211 if (driver_performs_reset) {
4213 /* Configure the reset registers. Must be done as early as
4214 * possible in case we fail during H/W initialization
4216 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4217 (CFG_RST_H_DMA_MASK |
4218 CFG_RST_H_MME_MASK |
4220 CFG_RST_H_TPC_7_MASK));
4222 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4224 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4225 (CFG_RST_H_HBM_MASK |
4226 CFG_RST_H_TPC_7_MASK |
4227 CFG_RST_H_NIC_MASK |
4229 CFG_RST_H_DMA_MASK |
4230 CFG_RST_H_MME_MASK |
4231 CFG_RST_H_CPU_MASK |
4232 CFG_RST_H_MMU_MASK));
4234 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4235 (CFG_RST_L_IF_MASK |
4236 CFG_RST_L_PSOC_MASK |
4237 CFG_RST_L_TPC_MASK));
4239 msleep(cpu_timeout_ms);
4241 /* Tell ASIC not to re-initialize PCIe */
4242 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4244 /* Restart BTL/BLR upon hard-reset */
4245 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4247 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4248 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4251 "Issued HARD reset command, going to wait %dms\n",
4255 "Firmware performs HARD reset, going to wait %dms\n",
4260 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4261 * itself is in reset. Need to wait until the reset is deasserted
4263 msleep(reset_timeout_ms);
4265 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4266 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4268 "Timeout while waiting for device to reset 0x%x\n",
4272 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4273 HW_CAP_HBM | HW_CAP_PCI_DMA |
4274 HW_CAP_MME | HW_CAP_TPC_MASK |
4275 HW_CAP_HBM_DMA | HW_CAP_PLL |
4276 HW_CAP_NIC_MASK | HW_CAP_MMU |
4277 HW_CAP_SRAM_SCRAMBLER |
4278 HW_CAP_HBM_SCRAMBLER |
4281 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4283 hdev->device_cpu_is_halted = false;
4287 static int gaudi_suspend(struct hl_device *hdev)
4291 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4293 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4298 static int gaudi_resume(struct hl_device *hdev)
4300 return gaudi_init_iatu(hdev);
4303 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4304 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4308 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4309 VM_DONTCOPY | VM_NORESERVE;
4311 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4312 (dma_addr - HOST_PHYS_BASE), size);
4314 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4319 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4321 struct cpu_dyn_regs *dyn_regs =
4322 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4323 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4324 struct gaudi_device *gaudi = hdev->asic_specific;
4325 bool invalid_queue = false;
4328 switch (hw_queue_id) {
4329 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4330 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4331 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4332 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4333 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4336 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4337 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4338 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4339 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4340 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4343 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4344 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4345 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4346 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4347 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4350 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4351 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4352 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4353 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4354 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4357 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4358 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4359 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4360 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4361 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4364 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4365 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4366 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4367 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4368 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4371 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4372 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4373 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4374 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4375 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4378 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4379 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4380 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4381 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4382 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4385 case GAUDI_QUEUE_ID_CPU_PQ:
4386 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4387 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4389 invalid_queue = true;
4392 case GAUDI_QUEUE_ID_MME_0_0:
4393 db_reg_offset = mmMME2_QM_PQ_PI_0;
4396 case GAUDI_QUEUE_ID_MME_0_1:
4397 db_reg_offset = mmMME2_QM_PQ_PI_1;
4400 case GAUDI_QUEUE_ID_MME_0_2:
4401 db_reg_offset = mmMME2_QM_PQ_PI_2;
4404 case GAUDI_QUEUE_ID_MME_0_3:
4405 db_reg_offset = mmMME2_QM_PQ_PI_3;
4408 case GAUDI_QUEUE_ID_MME_1_0:
4409 db_reg_offset = mmMME0_QM_PQ_PI_0;
4412 case GAUDI_QUEUE_ID_MME_1_1:
4413 db_reg_offset = mmMME0_QM_PQ_PI_1;
4416 case GAUDI_QUEUE_ID_MME_1_2:
4417 db_reg_offset = mmMME0_QM_PQ_PI_2;
4420 case GAUDI_QUEUE_ID_MME_1_3:
4421 db_reg_offset = mmMME0_QM_PQ_PI_3;
4424 case GAUDI_QUEUE_ID_TPC_0_0:
4425 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4428 case GAUDI_QUEUE_ID_TPC_0_1:
4429 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4432 case GAUDI_QUEUE_ID_TPC_0_2:
4433 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4436 case GAUDI_QUEUE_ID_TPC_0_3:
4437 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4440 case GAUDI_QUEUE_ID_TPC_1_0:
4441 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4444 case GAUDI_QUEUE_ID_TPC_1_1:
4445 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4448 case GAUDI_QUEUE_ID_TPC_1_2:
4449 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4452 case GAUDI_QUEUE_ID_TPC_1_3:
4453 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4456 case GAUDI_QUEUE_ID_TPC_2_0:
4457 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4460 case GAUDI_QUEUE_ID_TPC_2_1:
4461 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4464 case GAUDI_QUEUE_ID_TPC_2_2:
4465 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4468 case GAUDI_QUEUE_ID_TPC_2_3:
4469 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4472 case GAUDI_QUEUE_ID_TPC_3_0:
4473 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4476 case GAUDI_QUEUE_ID_TPC_3_1:
4477 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4480 case GAUDI_QUEUE_ID_TPC_3_2:
4481 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4484 case GAUDI_QUEUE_ID_TPC_3_3:
4485 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4488 case GAUDI_QUEUE_ID_TPC_4_0:
4489 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4492 case GAUDI_QUEUE_ID_TPC_4_1:
4493 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4496 case GAUDI_QUEUE_ID_TPC_4_2:
4497 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4500 case GAUDI_QUEUE_ID_TPC_4_3:
4501 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4504 case GAUDI_QUEUE_ID_TPC_5_0:
4505 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4508 case GAUDI_QUEUE_ID_TPC_5_1:
4509 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4512 case GAUDI_QUEUE_ID_TPC_5_2:
4513 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4516 case GAUDI_QUEUE_ID_TPC_5_3:
4517 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4520 case GAUDI_QUEUE_ID_TPC_6_0:
4521 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4524 case GAUDI_QUEUE_ID_TPC_6_1:
4525 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4528 case GAUDI_QUEUE_ID_TPC_6_2:
4529 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4532 case GAUDI_QUEUE_ID_TPC_6_3:
4533 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4536 case GAUDI_QUEUE_ID_TPC_7_0:
4537 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4540 case GAUDI_QUEUE_ID_TPC_7_1:
4541 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4544 case GAUDI_QUEUE_ID_TPC_7_2:
4545 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4548 case GAUDI_QUEUE_ID_TPC_7_3:
4549 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4552 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4553 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4554 invalid_queue = true;
4556 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4557 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4560 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4561 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4562 invalid_queue = true;
4564 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4565 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4568 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4569 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4570 invalid_queue = true;
4572 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4573 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4576 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4577 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4578 invalid_queue = true;
4580 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4581 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4584 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4585 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4586 invalid_queue = true;
4588 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4589 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4592 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4593 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4594 invalid_queue = true;
4596 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4597 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4600 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4601 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4602 invalid_queue = true;
4604 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4605 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4608 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4609 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4610 invalid_queue = true;
4612 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4613 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4616 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4617 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4618 invalid_queue = true;
4620 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4621 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4624 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4625 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4626 invalid_queue = true;
4628 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4629 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4633 invalid_queue = true;
4636 if (invalid_queue) {
4637 /* Should never get here */
4638 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4645 /* ring the doorbell */
4646 WREG32(db_reg_offset, db_value);
4648 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4649 /* make sure device CPU will read latest data from host */
4652 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4653 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4654 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4656 WREG32(irq_handler_offset,
4657 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4661 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4664 __le64 *pbd = (__le64 *) bd;
4666 /* The QMANs are on the host memory so a simple copy suffice */
4671 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4672 dma_addr_t *dma_handle, gfp_t flags)
4674 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4677 /* Shift to the device's base physical address of host memory */
4679 *dma_handle += HOST_PHYS_BASE;
4684 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4685 void *cpu_addr, dma_addr_t dma_handle)
4687 /* Cancel the device's base physical address of host memory */
4688 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4690 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4693 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4695 struct asic_fixed_properties *prop = &hdev->asic_prop;
4696 u64 cur_addr = DRAM_BASE_ADDR_USER;
4701 while (cur_addr < prop->dram_end_address) {
4702 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4703 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4706 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4709 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4710 cur_addr, cur_addr + chunk_size);
4712 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4713 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4714 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4715 lower_32_bits(cur_addr));
4716 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4717 upper_32_bits(cur_addr));
4718 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4720 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4721 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4722 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4724 cur_addr += chunk_size;
4726 if (cur_addr == prop->dram_end_address)
4730 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4731 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4733 rc = hl_poll_timeout(
4735 mmDMA0_CORE_STS0 + dma_offset,
4737 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4739 HBM_SCRUBBING_TIMEOUT_US);
4743 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4753 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4755 struct asic_fixed_properties *prop = &hdev->asic_prop;
4756 struct gaudi_device *gaudi = hdev->asic_specific;
4760 if (!hdev->memory_scrub)
4763 if (!addr && !size) {
4764 /* Wait till device is idle */
4765 rc = hl_poll_timeout(
4767 mmDMA0_CORE_STS0/* dummy */,
4769 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4772 HBM_SCRUBBING_TIMEOUT_US);
4774 dev_err(hdev->dev, "waiting for idle timeout\n");
4779 addr = prop->sram_user_base_address;
4780 size = hdev->pldm ? 0x10000 :
4781 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4782 val = 0x7777777777777777ull;
4784 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4787 "Failed to clear SRAM in mem scrub all\n");
4791 mutex_lock(&gaudi->clk_gate_mutex);
4792 hdev->asic_funcs->disable_clock_gating(hdev);
4794 /* Scrub HBM using all DMA channels in parallel */
4795 rc = gaudi_hbm_scrubbing(hdev);
4798 "Failed to clear HBM in mem scrub all\n");
4800 hdev->asic_funcs->set_clock_gating(hdev);
4801 mutex_unlock(&gaudi->clk_gate_mutex);
4807 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4808 u32 queue_id, dma_addr_t *dma_handle,
4811 struct gaudi_device *gaudi = hdev->asic_specific;
4812 struct gaudi_internal_qman_info *q;
4814 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4815 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4816 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4820 q = &gaudi->internal_qmans[queue_id];
4821 *dma_handle = q->pq_dma_addr;
4822 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4824 return q->pq_kernel_addr;
4827 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4828 u16 len, u32 timeout, u64 *result)
4830 struct gaudi_device *gaudi = hdev->asic_specific;
4832 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4839 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4841 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4845 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4847 struct packet_msg_prot *fence_pkt;
4848 dma_addr_t pkt_dma_addr;
4849 u32 fence_val, tmp, timeout_usec;
4850 dma_addr_t fence_dma_addr;
4855 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4857 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4859 fence_val = GAUDI_QMAN0_FENCE_VAL;
4861 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4865 "Failed to allocate memory for H/W queue %d testing\n",
4872 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4873 sizeof(struct packet_msg_prot),
4874 GFP_KERNEL, &pkt_dma_addr);
4877 "Failed to allocate packet for H/W queue %d testing\n",
4880 goto free_fence_ptr;
4883 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4884 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4885 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4887 fence_pkt->ctl = cpu_to_le32(tmp);
4888 fence_pkt->value = cpu_to_le32(fence_val);
4889 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4891 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4892 sizeof(struct packet_msg_prot),
4896 "Failed to send fence packet to H/W queue %d\n",
4901 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4902 1000, timeout_usec, true);
4904 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4906 if (rc == -ETIMEDOUT) {
4908 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4909 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4914 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4917 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4922 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4924 struct gaudi_device *gaudi = hdev->asic_specific;
4927 * check capability here as send_cpu_message() won't update the result
4928 * value if no capability
4930 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4933 return hl_fw_test_cpu_queue(hdev);
4936 static int gaudi_test_queues(struct hl_device *hdev)
4938 int i, rc, ret_val = 0;
4940 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4941 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4942 rc = gaudi_test_queue(hdev, i);
4948 rc = gaudi_test_cpu_queue(hdev);
4955 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4956 gfp_t mem_flags, dma_addr_t *dma_handle)
4960 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4963 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4965 /* Shift to the device's base physical address of host memory */
4967 *dma_handle += HOST_PHYS_BASE;
4972 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4973 dma_addr_t dma_addr)
4975 /* Cancel the device's base physical address of host memory */
4976 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4978 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4981 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4982 size_t size, dma_addr_t *dma_handle)
4984 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4987 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4988 size_t size, void *vaddr)
4990 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4993 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4994 int nents, enum dma_data_direction dir)
4996 struct scatterlist *sg;
4999 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5002 /* Shift to the device's base physical address of host memory */
5003 for_each_sg(sgl, sg, nents, i)
5004 sg->dma_address += HOST_PHYS_BASE;
5009 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5010 int nents, enum dma_data_direction dir)
5012 struct scatterlist *sg;
5015 /* Cancel the device's base physical address of host memory */
5016 for_each_sg(sgl, sg, nents, i)
5017 sg->dma_address -= HOST_PHYS_BASE;
5019 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5022 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5023 struct sg_table *sgt)
5025 struct scatterlist *sg, *sg_next_iter;
5026 u32 count, dma_desc_cnt;
5028 dma_addr_t addr, addr_next;
5032 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5034 len = sg_dma_len(sg);
5035 addr = sg_dma_address(sg);
5040 while ((count + 1) < sgt->nents) {
5041 sg_next_iter = sg_next(sg);
5042 len_next = sg_dma_len(sg_next_iter);
5043 addr_next = sg_dma_address(sg_next_iter);
5048 if ((addr + len == addr_next) &&
5049 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5061 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5064 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5065 struct hl_cs_parser *parser,
5066 struct packet_lin_dma *user_dma_pkt,
5067 u64 addr, enum dma_data_direction dir)
5069 struct hl_userptr *userptr;
5072 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5073 parser->job_userptr_list, &userptr))
5074 goto already_pinned;
5076 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5080 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5085 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5087 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5088 userptr->sgt->nents, dir);
5090 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5094 userptr->dma_mapped = true;
5098 parser->patched_cb_size +=
5099 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5104 list_del(&userptr->job_node);
5105 hl_unpin_host_memory(hdev, userptr);
5111 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5112 struct hl_cs_parser *parser,
5113 struct packet_lin_dma *user_dma_pkt,
5116 enum dma_data_direction dir;
5117 bool skip_host_mem_pin = false, user_memset;
5121 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5122 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5123 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5127 skip_host_mem_pin = true;
5129 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5130 dir = DMA_TO_DEVICE;
5131 addr = le64_to_cpu(user_dma_pkt->src_addr);
5133 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5134 dir = DMA_FROM_DEVICE;
5135 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5136 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5137 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5140 if (skip_host_mem_pin)
5141 parser->patched_cb_size += sizeof(*user_dma_pkt);
5143 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5149 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5150 struct hl_cs_parser *parser,
5151 struct packet_lin_dma *user_dma_pkt)
5153 bool src_in_host = false;
5154 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5155 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5156 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5158 dev_dbg(hdev->dev, "DMA packet details:\n");
5159 dev_dbg(hdev->dev, "source == 0x%llx\n",
5160 le64_to_cpu(user_dma_pkt->src_addr));
5161 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5162 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5165 * Special handling for DMA with size 0. Bypass all validations
5166 * because no transactions will be done except for WR_COMP, which
5167 * is not a security issue
5169 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5170 parser->patched_cb_size += sizeof(*user_dma_pkt);
5174 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5177 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5181 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5182 struct hl_cs_parser *parser,
5183 struct packet_load_and_exe *user_pkt)
5187 cfg = le32_to_cpu(user_pkt->cfg);
5189 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5191 "User not allowed to use Load and Execute\n");
5195 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5200 static int gaudi_validate_cb(struct hl_device *hdev,
5201 struct hl_cs_parser *parser, bool is_mmu)
5203 u32 cb_parsed_length = 0;
5206 parser->patched_cb_size = 0;
5208 /* cb_user_size is more than 0 so loop will always be executed */
5209 while (cb_parsed_length < parser->user_cb_size) {
5210 enum packet_id pkt_id;
5212 struct gaudi_packet *user_pkt;
5214 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5216 pkt_id = (enum packet_id) (
5217 (le64_to_cpu(user_pkt->header) &
5218 PACKET_HEADER_PACKET_ID_MASK) >>
5219 PACKET_HEADER_PACKET_ID_SHIFT);
5221 if (!validate_packet_id(pkt_id)) {
5222 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5227 pkt_size = gaudi_packet_sizes[pkt_id];
5228 cb_parsed_length += pkt_size;
5229 if (cb_parsed_length > parser->user_cb_size) {
5231 "packet 0x%x is out of CB boundary\n", pkt_id);
5237 case PACKET_MSG_PROT:
5239 "User not allowed to use MSG_PROT\n");
5244 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5249 dev_err(hdev->dev, "User not allowed to use STOP\n");
5253 case PACKET_WREG_BULK:
5255 "User not allowed to use WREG_BULK\n");
5259 case PACKET_LOAD_AND_EXE:
5260 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5261 (struct packet_load_and_exe *) user_pkt);
5264 case PACKET_LIN_DMA:
5265 parser->contains_dma_pkt = true;
5267 parser->patched_cb_size += pkt_size;
5269 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5270 (struct packet_lin_dma *) user_pkt);
5273 case PACKET_WREG_32:
5274 case PACKET_MSG_LONG:
5275 case PACKET_MSG_SHORT:
5279 case PACKET_ARB_POINT:
5280 parser->patched_cb_size += pkt_size;
5284 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5295 * The new CB should have space at the end for two MSG_PROT packets:
5296 * 1. A packet that will act as a completion packet
5297 * 2. A packet that will generate MSI-X interrupt
5299 if (parser->completion)
5300 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5305 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5306 struct hl_cs_parser *parser,
5307 struct packet_lin_dma *user_dma_pkt,
5308 struct packet_lin_dma *new_dma_pkt,
5309 u32 *new_dma_pkt_size)
5311 struct hl_userptr *userptr;
5312 struct scatterlist *sg, *sg_next_iter;
5313 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5315 dma_addr_t dma_addr, dma_addr_next;
5316 u64 device_memory_addr, addr;
5317 enum dma_data_direction dir;
5318 struct sg_table *sgt;
5319 bool src_in_host = false;
5320 bool skip_host_mem_pin = false;
5323 ctl = le32_to_cpu(user_dma_pkt->ctl);
5325 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5328 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5329 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5332 addr = le64_to_cpu(user_dma_pkt->src_addr);
5333 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5334 dir = DMA_TO_DEVICE;
5336 skip_host_mem_pin = true;
5338 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5339 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5340 dir = DMA_FROM_DEVICE;
5343 if ((!skip_host_mem_pin) &&
5344 (!hl_userptr_is_pinned(hdev, addr,
5345 le32_to_cpu(user_dma_pkt->tsize),
5346 parser->job_userptr_list, &userptr))) {
5347 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5348 addr, user_dma_pkt->tsize);
5352 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5353 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5354 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5358 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5363 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5364 len = sg_dma_len(sg);
5365 dma_addr = sg_dma_address(sg);
5370 while ((count + 1) < sgt->nents) {
5371 sg_next_iter = sg_next(sg);
5372 len_next = sg_dma_len(sg_next_iter);
5373 dma_addr_next = sg_dma_address(sg_next_iter);
5378 if ((dma_addr + len == dma_addr_next) &&
5379 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5388 ctl = le32_to_cpu(user_dma_pkt->ctl);
5389 if (likely(dma_desc_cnt))
5390 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5391 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5392 new_dma_pkt->ctl = cpu_to_le32(ctl);
5393 new_dma_pkt->tsize = cpu_to_le32(len);
5395 if (dir == DMA_TO_DEVICE) {
5396 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5397 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5399 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5400 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5404 device_memory_addr += len;
5409 if (!dma_desc_cnt) {
5411 "Error of 0 SG entries when patching DMA packet\n");
5415 /* Fix the last dma packet - wrcomp must be as user set it */
5417 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5419 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5424 static int gaudi_patch_cb(struct hl_device *hdev,
5425 struct hl_cs_parser *parser)
5427 u32 cb_parsed_length = 0;
5428 u32 cb_patched_cur_length = 0;
5431 /* cb_user_size is more than 0 so loop will always be executed */
5432 while (cb_parsed_length < parser->user_cb_size) {
5433 enum packet_id pkt_id;
5435 u32 new_pkt_size = 0;
5436 struct gaudi_packet *user_pkt, *kernel_pkt;
5438 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5439 kernel_pkt = parser->patched_cb->kernel_address +
5440 cb_patched_cur_length;
5442 pkt_id = (enum packet_id) (
5443 (le64_to_cpu(user_pkt->header) &
5444 PACKET_HEADER_PACKET_ID_MASK) >>
5445 PACKET_HEADER_PACKET_ID_SHIFT);
5447 if (!validate_packet_id(pkt_id)) {
5448 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5453 pkt_size = gaudi_packet_sizes[pkt_id];
5454 cb_parsed_length += pkt_size;
5455 if (cb_parsed_length > parser->user_cb_size) {
5457 "packet 0x%x is out of CB boundary\n", pkt_id);
5463 case PACKET_LIN_DMA:
5464 rc = gaudi_patch_dma_packet(hdev, parser,
5465 (struct packet_lin_dma *) user_pkt,
5466 (struct packet_lin_dma *) kernel_pkt,
5468 cb_patched_cur_length += new_pkt_size;
5471 case PACKET_MSG_PROT:
5473 "User not allowed to use MSG_PROT\n");
5478 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5483 dev_err(hdev->dev, "User not allowed to use STOP\n");
5487 case PACKET_WREG_32:
5488 case PACKET_WREG_BULK:
5489 case PACKET_MSG_LONG:
5490 case PACKET_MSG_SHORT:
5494 case PACKET_ARB_POINT:
5495 case PACKET_LOAD_AND_EXE:
5496 memcpy(kernel_pkt, user_pkt, pkt_size);
5497 cb_patched_cur_length += pkt_size;
5501 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5514 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5515 struct hl_cs_parser *parser)
5517 u64 patched_cb_handle;
5518 u32 patched_cb_size;
5519 struct hl_cb *user_cb;
5523 * The new CB should have space at the end for two MSG_PROT pkt:
5524 * 1. A packet that will act as a completion packet
5525 * 2. A packet that will generate MSI interrupt
5527 if (parser->completion)
5528 parser->patched_cb_size = parser->user_cb_size +
5529 sizeof(struct packet_msg_prot) * 2;
5531 parser->patched_cb_size = parser->user_cb_size;
5533 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5534 parser->patched_cb_size, false, false,
5535 &patched_cb_handle);
5539 "Failed to allocate patched CB for DMA CS %d\n",
5544 patched_cb_handle >>= PAGE_SHIFT;
5545 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5546 (u32) patched_cb_handle);
5547 /* hl_cb_get should never fail */
5548 if (!parser->patched_cb) {
5549 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5550 (u32) patched_cb_handle);
5556 * The check that parser->user_cb_size <= parser->user_cb->size was done
5557 * in validate_queue_index().
5559 memcpy(parser->patched_cb->kernel_address,
5560 parser->user_cb->kernel_address,
5561 parser->user_cb_size);
5563 patched_cb_size = parser->patched_cb_size;
5565 /* Validate patched CB instead of user CB */
5566 user_cb = parser->user_cb;
5567 parser->user_cb = parser->patched_cb;
5568 rc = gaudi_validate_cb(hdev, parser, true);
5569 parser->user_cb = user_cb;
5572 hl_cb_put(parser->patched_cb);
5576 if (patched_cb_size != parser->patched_cb_size) {
5577 dev_err(hdev->dev, "user CB size mismatch\n");
5578 hl_cb_put(parser->patched_cb);
5585 * Always call cb destroy here because we still have 1 reference
5586 * to it by calling cb_get earlier. After the job will be completed,
5587 * cb_put will release it, but here we want to remove it from the
5590 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5591 patched_cb_handle << PAGE_SHIFT);
5596 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5597 struct hl_cs_parser *parser)
5599 u64 patched_cb_handle;
5602 rc = gaudi_validate_cb(hdev, parser, false);
5607 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5608 parser->patched_cb_size, false, false,
5609 &patched_cb_handle);
5612 "Failed to allocate patched CB for DMA CS %d\n", rc);
5616 patched_cb_handle >>= PAGE_SHIFT;
5617 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5618 (u32) patched_cb_handle);
5619 /* hl_cb_get should never fail here */
5620 if (!parser->patched_cb) {
5621 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5622 (u32) patched_cb_handle);
5627 rc = gaudi_patch_cb(hdev, parser);
5630 hl_cb_put(parser->patched_cb);
5634 * Always call cb destroy here because we still have 1 reference
5635 * to it by calling cb_get earlier. After the job will be completed,
5636 * cb_put will release it, but here we want to remove it from the
5639 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5640 patched_cb_handle << PAGE_SHIFT);
5644 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5648 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5649 struct hl_cs_parser *parser)
5651 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5652 struct gaudi_device *gaudi = hdev->asic_specific;
5653 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5654 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5656 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5657 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5658 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5659 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5660 parser->hw_queue_id);
5664 /* For internal queue jobs just check if CB address is valid */
5665 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5666 parser->user_cb_size,
5667 asic_prop->sram_user_base_address,
5668 asic_prop->sram_end_address))
5671 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5672 parser->user_cb_size,
5673 asic_prop->dram_user_base_address,
5674 asic_prop->dram_end_address))
5677 /* PMMU and HPMMU addresses are equal, check only one of them */
5678 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5679 parser->user_cb_size,
5680 asic_prop->pmmu.start_addr,
5681 asic_prop->pmmu.end_addr))
5685 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5686 parser->user_cb, parser->user_cb_size);
5691 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5693 struct gaudi_device *gaudi = hdev->asic_specific;
5695 if (parser->queue_type == QUEUE_TYPE_INT)
5696 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5698 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5699 return gaudi_parse_cb_mmu(hdev, parser);
5701 return gaudi_parse_cb_no_mmu(hdev, parser);
5704 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5705 void *kernel_address, u32 len,
5706 u64 cq_addr, u32 cq_val, u32 msi_vec,
5709 struct gaudi_device *gaudi = hdev->asic_specific;
5710 struct packet_msg_prot *cq_pkt;
5713 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5715 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5716 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5719 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5721 cq_pkt->ctl = cpu_to_le32(tmp);
5722 cq_pkt->value = cpu_to_le32(cq_val);
5723 cq_pkt->addr = cpu_to_le64(cq_addr);
5727 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5728 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5729 cq_pkt->ctl = cpu_to_le32(tmp);
5730 cq_pkt->value = cpu_to_le32(1);
5732 if (!gaudi->multi_msi_mode)
5735 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5738 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5740 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5743 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5746 struct packet_lin_dma *lin_dma_pkt;
5747 struct hl_cs_job *job;
5748 u32 cb_size, ctl, err_cause;
5753 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5757 lin_dma_pkt = cb->kernel_address;
5758 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5759 cb_size = sizeof(*lin_dma_pkt);
5761 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5762 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5763 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5764 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5765 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5767 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5768 lin_dma_pkt->src_addr = cpu_to_le64(val);
5769 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5770 lin_dma_pkt->tsize = cpu_to_le32(size);
5772 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5774 dev_err(hdev->dev, "Failed to allocate a new job\n");
5779 /* Verify DMA is OK */
5780 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5781 if (err_cause && !hdev->init_done) {
5783 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5785 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5790 atomic_inc(&job->user_cb->cs_cnt);
5791 job->user_cb_size = cb_size;
5792 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5793 job->patched_cb = job->user_cb;
5794 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5796 hl_debugfs_add_job(hdev, job);
5798 rc = gaudi_send_job_on_qman0(hdev, job);
5799 hl_debugfs_remove_job(hdev, job);
5801 atomic_dec(&cb->cs_cnt);
5803 /* Verify DMA is OK */
5804 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5806 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5808 if (!hdev->init_done) {
5810 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5812 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5819 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5824 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5825 u32 num_regs, u32 val)
5827 struct packet_msg_long *pkt;
5828 struct hl_cs_job *job;
5833 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5835 if (cb_size > SZ_2M) {
5836 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5840 cb = hl_cb_kernel_create(hdev, cb_size, false);
5844 pkt = cb->kernel_address;
5846 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5847 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5848 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5849 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5850 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5852 for (i = 0; i < num_regs ; i++, pkt++) {
5853 pkt->ctl = cpu_to_le32(ctl);
5854 pkt->value = cpu_to_le32(val);
5855 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5858 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5860 dev_err(hdev->dev, "Failed to allocate a new job\n");
5867 atomic_inc(&job->user_cb->cs_cnt);
5868 job->user_cb_size = cb_size;
5869 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5870 job->patched_cb = job->user_cb;
5871 job->job_cb_size = cb_size;
5873 hl_debugfs_add_job(hdev, job);
5875 rc = gaudi_send_job_on_qman0(hdev, job);
5876 hl_debugfs_remove_job(hdev, job);
5878 atomic_dec(&cb->cs_cnt);
5882 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5887 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5888 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5891 struct hl_pending_cb *pending_cb;
5892 struct packet_msg_long *pkt;
5897 mutex_lock(&hdev->fpriv_list_lock);
5898 ctx = hdev->compute_ctx;
5900 /* If no compute context available or context is going down
5901 * memset registers directly
5903 if (!ctx || kref_read(&ctx->refcount) == 0) {
5904 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5905 mutex_unlock(&hdev->fpriv_list_lock);
5909 mutex_unlock(&hdev->fpriv_list_lock);
5911 cb_size = (sizeof(*pkt) * num_regs) +
5912 sizeof(struct packet_msg_prot) * 2;
5914 if (cb_size > SZ_2M) {
5915 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5919 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5923 cb = hl_cb_kernel_create(hdev, cb_size, false);
5929 pkt = cb->kernel_address;
5931 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5932 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5933 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5934 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5935 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5937 for (i = 0; i < num_regs ; i++, pkt++) {
5938 pkt->ctl = cpu_to_le32(ctl);
5939 pkt->value = cpu_to_le32(val);
5940 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5943 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5945 pending_cb->cb = cb;
5946 pending_cb->cb_size = cb_size;
5947 /* The queue ID MUST be an external queue ID. Otherwise, we will
5948 * have undefined behavior
5950 pending_cb->hw_queue_id = hw_queue_id;
5952 spin_lock(&ctx->pending_cb_lock);
5953 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5954 spin_unlock(&ctx->pending_cb_lock);
5959 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5965 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5966 num_regs = NUM_OF_SOB_IN_BLOCK;
5967 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5969 dev_err(hdev->dev, "failed resetting SM registers");
5973 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5974 num_regs = NUM_OF_SOB_IN_BLOCK;
5975 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5977 dev_err(hdev->dev, "failed resetting SM registers");
5981 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5982 num_regs = NUM_OF_SOB_IN_BLOCK;
5983 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5985 dev_err(hdev->dev, "failed resetting SM registers");
5989 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5990 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5991 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993 dev_err(hdev->dev, "failed resetting SM registers");
5997 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5998 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5999 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001 dev_err(hdev->dev, "failed resetting SM registers");
6005 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6006 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6007 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6009 dev_err(hdev->dev, "failed resetting SM registers");
6013 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6014 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6015 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6016 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6018 dev_err(hdev->dev, "failed resetting SM registers");
6022 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6023 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6024 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6025 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6027 dev_err(hdev->dev, "failed resetting SM registers");
6034 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6036 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6037 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6040 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6041 u64 sob_addr = CFG_BASE +
6042 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6044 u32 dma_offset = i * DMA_CORE_OFFSET;
6046 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6047 lower_32_bits(sob_addr));
6048 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6049 upper_32_bits(sob_addr));
6050 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6052 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6053 * modified by the user for SRAM reduction
6056 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6061 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6066 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6067 qman_offset = i * DMA_QMAN_OFFSET;
6068 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6071 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6072 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6073 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6076 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6077 qman_offset = i * TPC_QMAN_OFFSET;
6078 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6081 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6082 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6083 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6084 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6088 static int gaudi_restore_user_registers(struct hl_device *hdev)
6092 rc = gaudi_restore_sm_registers(hdev);
6096 gaudi_restore_dma_registers(hdev);
6097 gaudi_restore_qm_registers(hdev);
6102 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6104 return gaudi_restore_user_registers(hdev);
6107 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6109 struct asic_fixed_properties *prop = &hdev->asic_prop;
6110 struct gaudi_device *gaudi = hdev->asic_specific;
6111 u64 addr = prop->mmu_pgt_addr;
6112 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6114 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6117 return gaudi_memset_device_memory(hdev, addr, size, 0);
6120 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6125 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6126 bool user_address, u32 *val)
6128 struct asic_fixed_properties *prop = &hdev->asic_prop;
6129 struct gaudi_device *gaudi = hdev->asic_specific;
6130 u64 hbm_bar_addr, host_phys_end;
6133 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6135 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6137 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6138 (hdev->clock_gating_mask &
6139 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6141 dev_err_ratelimited(hdev->dev,
6142 "Can't read register - clock gating is enabled!\n");
6145 *val = RREG32(addr - CFG_BASE);
6148 } else if ((addr >= SRAM_BASE_ADDR) &&
6149 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6150 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6151 (addr - SRAM_BASE_ADDR));
6152 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6153 u64 bar_base_addr = DRAM_PHYS_BASE +
6154 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6156 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6157 if (hbm_bar_addr != U64_MAX) {
6158 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6159 (addr - bar_base_addr));
6161 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6164 if (hbm_bar_addr == U64_MAX)
6166 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6167 user_address && !iommu_present(&pci_bus_type)) {
6168 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6176 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6177 bool user_address, u32 val)
6179 struct asic_fixed_properties *prop = &hdev->asic_prop;
6180 struct gaudi_device *gaudi = hdev->asic_specific;
6181 u64 hbm_bar_addr, host_phys_end;
6184 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6186 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6188 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6189 (hdev->clock_gating_mask &
6190 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6192 dev_err_ratelimited(hdev->dev,
6193 "Can't write register - clock gating is enabled!\n");
6196 WREG32(addr - CFG_BASE, val);
6199 } else if ((addr >= SRAM_BASE_ADDR) &&
6200 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6201 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6202 (addr - SRAM_BASE_ADDR));
6203 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6204 u64 bar_base_addr = DRAM_PHYS_BASE +
6205 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6207 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6208 if (hbm_bar_addr != U64_MAX) {
6209 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6210 (addr - bar_base_addr));
6212 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6215 if (hbm_bar_addr == U64_MAX)
6217 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6218 user_address && !iommu_present(&pci_bus_type)) {
6219 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6227 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6228 bool user_address, u64 *val)
6230 struct asic_fixed_properties *prop = &hdev->asic_prop;
6231 struct gaudi_device *gaudi = hdev->asic_specific;
6232 u64 hbm_bar_addr, host_phys_end;
6235 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6237 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6239 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6240 (hdev->clock_gating_mask &
6241 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6243 dev_err_ratelimited(hdev->dev,
6244 "Can't read register - clock gating is enabled!\n");
6247 u32 val_l = RREG32(addr - CFG_BASE);
6248 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6250 *val = (((u64) val_h) << 32) | val_l;
6253 } else if ((addr >= SRAM_BASE_ADDR) &&
6254 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6255 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6256 (addr - SRAM_BASE_ADDR));
6258 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6259 u64 bar_base_addr = DRAM_PHYS_BASE +
6260 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6262 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6263 if (hbm_bar_addr != U64_MAX) {
6264 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6265 (addr - bar_base_addr));
6267 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6270 if (hbm_bar_addr == U64_MAX)
6272 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6273 user_address && !iommu_present(&pci_bus_type)) {
6274 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6282 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6283 bool user_address, u64 val)
6285 struct asic_fixed_properties *prop = &hdev->asic_prop;
6286 struct gaudi_device *gaudi = hdev->asic_specific;
6287 u64 hbm_bar_addr, host_phys_end;
6290 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6292 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6294 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6295 (hdev->clock_gating_mask &
6296 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6298 dev_err_ratelimited(hdev->dev,
6299 "Can't write register - clock gating is enabled!\n");
6302 WREG32(addr - CFG_BASE, lower_32_bits(val));
6303 WREG32(addr + sizeof(u32) - CFG_BASE,
6304 upper_32_bits(val));
6307 } else if ((addr >= SRAM_BASE_ADDR) &&
6308 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6309 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6310 (addr - SRAM_BASE_ADDR));
6312 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6313 u64 bar_base_addr = DRAM_PHYS_BASE +
6314 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6316 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6317 if (hbm_bar_addr != U64_MAX) {
6318 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6319 (addr - bar_base_addr));
6321 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6324 if (hbm_bar_addr == U64_MAX)
6326 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6327 user_address && !iommu_present(&pci_bus_type)) {
6328 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6336 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6337 u32 size_to_dma, dma_addr_t dma_addr)
6343 dma_offset = dma_id * DMA_CORE_OFFSET;
6345 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6346 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6347 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6348 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6349 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6350 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6351 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6353 rc = hl_poll_timeout(
6355 mmDMA0_CORE_STS0 + dma_offset,
6357 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6363 "DMA %d timed-out during reading of 0x%llx\n",
6368 /* Verify DMA is OK */
6369 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6371 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6373 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6375 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6383 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6386 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6387 struct gaudi_device *gaudi = hdev->asic_specific;
6388 u64 dma_offset, qm_offset;
6389 dma_addr_t dma_addr;
6394 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6397 GFP_KERNEL | __GFP_ZERO);
6402 mutex_lock(&gaudi->clk_gate_mutex);
6404 hdev->asic_funcs->disable_clock_gating(hdev);
6406 hdev->asic_funcs->hw_queues_lock(hdev);
6408 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6409 dma_offset = dma_id * DMA_CORE_OFFSET;
6410 qm_offset = dma_id * DMA_QMAN_OFFSET;
6411 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6412 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6415 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6416 dma_offset = dma_id * DMA_CORE_OFFSET;
6417 qm_offset = dma_id * DMA_QMAN_OFFSET;
6418 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6419 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6422 dev_err_ratelimited(hdev->dev,
6423 "Can't read via DMA because it is BUSY\n");
6429 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6430 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6431 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6433 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6434 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6437 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6439 /* Verify DMA is OK */
6440 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6443 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6445 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6450 size_to_dma = SZ_2M;
6452 while (size_left > 0) {
6454 if (size_left < SZ_2M)
6455 size_to_dma = size_left;
6457 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6462 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6464 if (size_left <= SZ_2M)
6472 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6473 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6476 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6477 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6479 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6482 hdev->asic_funcs->hw_queues_unlock(hdev);
6484 hdev->asic_funcs->set_clock_gating(hdev);
6486 mutex_unlock(&gaudi->clk_gate_mutex);
6488 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6494 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6496 struct gaudi_device *gaudi = hdev->asic_specific;
6498 if (hdev->hard_reset_pending)
6501 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6502 (addr - gaudi->hbm_bar_cur_addr));
6505 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6507 struct gaudi_device *gaudi = hdev->asic_specific;
6509 if (hdev->hard_reset_pending)
6512 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6513 (addr - gaudi->hbm_bar_cur_addr));
6516 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6518 /* mask to zero the MMBP and ASID bits */
6519 WREG32_AND(reg, ~0x7FF);
6520 WREG32_OR(reg, asid);
6523 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6525 struct gaudi_device *gaudi = hdev->asic_specific;
6527 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6530 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6531 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6535 mutex_lock(&gaudi->clk_gate_mutex);
6537 hdev->asic_funcs->disable_clock_gating(hdev);
6539 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6540 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6541 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6542 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6543 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6545 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6546 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6547 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6548 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6549 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6551 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6552 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6553 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6554 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6555 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6557 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6558 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6559 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6560 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6561 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6563 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6564 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6565 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6571 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6582 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6583 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6594 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6601 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6602 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6605 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6606 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6607 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6608 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6609 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6610 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6612 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6613 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6614 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6617 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6618 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6620 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6621 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6622 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6623 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6624 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6625 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6626 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6628 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6629 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6630 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6631 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6632 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6633 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6634 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6636 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6637 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6638 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6639 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6640 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6641 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6642 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6644 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6645 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6646 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6647 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6648 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6649 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6650 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6652 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6653 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6654 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6655 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6656 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6657 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6658 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6660 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6661 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6662 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6663 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6664 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6665 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6666 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6667 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6668 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6669 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6671 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6672 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6673 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6674 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6675 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6676 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6677 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6678 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6679 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6680 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6681 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6682 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6684 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6685 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6687 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6689 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6691 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6693 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6697 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6698 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6700 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6702 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6704 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6706 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6710 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6711 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6713 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6715 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6717 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6719 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6723 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6724 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6726 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6728 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6730 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6732 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6736 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6737 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6739 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6741 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6743 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6745 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6749 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6750 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6752 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6754 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6756 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6758 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6762 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6763 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6765 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6767 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6769 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6771 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6775 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6776 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6778 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6780 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6782 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6784 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6788 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6789 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6791 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6793 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6795 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6797 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6801 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6802 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6804 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6806 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6808 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6810 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6814 hdev->asic_funcs->set_clock_gating(hdev);
6816 mutex_unlock(&gaudi->clk_gate_mutex);
6819 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6820 struct hl_cs_job *job)
6822 struct packet_msg_prot *fence_pkt;
6824 dma_addr_t fence_dma_addr;
6826 u32 tmp, timeout, dma_offset;
6830 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6832 timeout = HL_DEVICE_TIMEOUT_USEC;
6834 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6835 dev_err_ratelimited(hdev->dev,
6836 "Can't send driver job on QMAN0 because the device is not idle\n");
6840 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6844 "Failed to allocate fence memory for QMAN0\n");
6848 cb = job->patched_cb;
6850 fence_pkt = cb->kernel_address +
6851 job->job_cb_size - sizeof(struct packet_msg_prot);
6853 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6854 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6855 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6857 fence_pkt->ctl = cpu_to_le32(tmp);
6858 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6859 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6861 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6863 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6865 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6866 job->job_cb_size, cb->bus_address);
6868 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6869 goto free_fence_ptr;
6872 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6873 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6876 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6878 if (rc == -ETIMEDOUT) {
6879 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6880 goto free_fence_ptr;
6884 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6885 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6887 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6892 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6894 if (event_type >= GAUDI_EVENT_SIZE)
6895 goto event_not_supported;
6897 if (!gaudi_irq_map_table[event_type].valid)
6898 goto event_not_supported;
6900 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6904 event_not_supported:
6905 snprintf(desc, size, "N/A");
6908 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6909 u32 x_y, bool is_write)
6911 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6913 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6914 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6917 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6918 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6922 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6923 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6927 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6928 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6932 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6933 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6938 goto unknown_initiator;
6941 for (i = 0 ; i < 2 ; i++) {
6942 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6943 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6947 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6948 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6949 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6951 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6954 return "DMA0 or DMA2";
6955 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6956 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6957 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6959 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6962 return "DMA1 or DMA3";
6963 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6964 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6965 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6967 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6970 return "DMA4 or DMA6";
6971 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6972 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6973 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6975 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6978 return "DMA5 or DMA7";
6982 return "unknown initiator";
6985 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6988 u32 val, x_y, axi_id;
6990 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6991 RREG32(mmMMU_UP_RAZWI_READ_ID);
6992 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6993 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6994 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6995 RAZWI_INITIATOR_AXI_ID_SHIFT);
6998 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6999 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7001 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7004 case RAZWI_INITIATOR_ID_X_Y_TPC1:
7006 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7007 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7009 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7010 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7012 case RAZWI_INITIATOR_ID_X_Y_TPC2:
7014 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7015 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7017 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7019 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7021 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7024 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7025 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7026 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7027 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7028 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7029 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7030 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7031 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7032 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7033 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7034 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7036 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7038 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7041 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7043 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7044 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7046 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7047 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7049 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7051 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7052 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7054 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7056 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7064 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7066 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7067 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7068 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7069 RAZWI_INITIATOR_AXI_ID_MASK);
7071 return "unknown initiator";
7074 static void gaudi_print_razwi_info(struct hl_device *hdev)
7076 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7077 dev_err_ratelimited(hdev->dev,
7078 "RAZWI event caused by illegal write of %s\n",
7079 gaudi_get_razwi_initiator_name(hdev, true));
7080 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7083 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7084 dev_err_ratelimited(hdev->dev,
7085 "RAZWI event caused by illegal read of %s\n",
7086 gaudi_get_razwi_initiator_name(hdev, false));
7087 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7091 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7093 struct gaudi_device *gaudi = hdev->asic_specific;
7097 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7100 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7101 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7102 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7104 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7106 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7109 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7112 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7113 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7114 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7116 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7118 dev_err_ratelimited(hdev->dev,
7119 "MMU access error on va 0x%llx\n", addr);
7121 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7126 * +-------------------+------------------------------------------------------+
7127 * | Configuration Reg | Description |
7129 * +-------------------+------------------------------------------------------+
7130 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7131 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7132 * | |0xF34 memory wrappers 63:32 |
7133 * | |0xF38 memory wrappers 95:64 |
7134 * | |0xF3C memory wrappers 127:96 |
7135 * +-------------------+------------------------------------------------------+
7136 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7137 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7138 * | |0xF44 memory wrappers 63:32 |
7139 * | |0xF48 memory wrappers 95:64 |
7140 * | |0xF4C memory wrappers 127:96 |
7141 * +-------------------+------------------------------------------------------+
7143 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7144 struct ecc_info_extract_params *params, u64 *ecc_address,
7145 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7147 struct gaudi_device *gaudi = hdev->asic_specific;
7148 u32 i, num_mem_regs, reg, err_bit;
7149 u64 err_addr, err_word = 0;
7152 num_mem_regs = params->num_memories / 32 +
7153 ((params->num_memories % 32) ? 1 : 0);
7155 if (params->block_address >= CFG_BASE)
7156 params->block_address -= CFG_BASE;
7159 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7161 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7163 if (params->disable_clock_gating) {
7164 mutex_lock(&gaudi->clk_gate_mutex);
7165 hdev->asic_funcs->disable_clock_gating(hdev);
7168 /* Set invalid wrapper index */
7169 *memory_wrapper_idx = 0xFF;
7171 /* Iterate through memory wrappers, a single bit must be set */
7172 for (i = 0 ; i < num_mem_regs ; i++) {
7174 err_word = RREG32(err_addr);
7176 err_bit = __ffs(err_word);
7177 *memory_wrapper_idx = err_bit + (32 * i);
7182 if (*memory_wrapper_idx == 0xFF) {
7183 dev_err(hdev->dev, "ECC error information cannot be found\n");
7185 goto enable_clk_gate;
7188 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7189 *memory_wrapper_idx);
7192 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7194 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7196 /* Clear error indication */
7197 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7199 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7201 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7203 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7206 if (params->disable_clock_gating) {
7207 hdev->asic_funcs->set_clock_gating(hdev);
7209 mutex_unlock(&gaudi->clk_gate_mutex);
7216 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7218 * @idx: the current pi/ci value
7219 * @q_len: the queue length (power of 2)
7221 * @return the cyclically decremented index
7223 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7225 u32 mask = q_len - 1;
7228 * modular decrement is equivalent to adding (queue_size -1)
7229 * later we take LSBs to make sure the value is in the
7230 * range [0, queue_len - 1]
7232 return (idx + q_len - 1) & mask;
7236 * gaudi_print_sw_config_stream_data - print SW config stream data
7238 * @hdev: pointer to the habanalabs device structure
7239 * @stream: the QMAN's stream
7240 * @qman_base: base address of QMAN registers block
7242 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7245 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7246 u32 cq_ptr_lo_off, size;
7248 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7250 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7251 stream * cq_ptr_lo_off;
7252 cq_ptr_hi = cq_ptr_lo +
7253 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7254 cq_tsize = cq_ptr_lo +
7255 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7257 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7258 size = RREG32(cq_tsize);
7259 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7260 stream, cq_ptr, size);
7264 * gaudi_print_last_pqes_on_err - print last PQEs on error
7266 * @hdev: pointer to the habanalabs device structure
7267 * @qid_base: first QID of the QMAN (out of 4 streams)
7268 * @stream: the QMAN's stream
7269 * @qman_base: base address of QMAN registers block
7270 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7272 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7273 u32 stream, u64 qman_base,
7276 u32 ci, qm_ci_stream_off, queue_len;
7277 struct hl_hw_queue *q;
7281 q = &hdev->kernel_queues[qid_base + stream];
7283 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7284 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7285 stream * qm_ci_stream_off;
7287 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7288 q->int_queue_len : HL_QUEUE_LENGTH;
7290 hdev->asic_funcs->hw_queues_lock(hdev);
7293 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7297 /* we should start printing form ci -1 */
7298 ci = gaudi_queue_idx_dec(ci, queue_len);
7300 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7305 bd = q->kernel_address;
7308 len = le32_to_cpu(bd->len);
7309 /* len 0 means uninitialized entry- break */
7313 addr = le64_to_cpu(bd->ptr);
7315 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7316 stream, ci, addr, len);
7318 /* get previous ci, wrap if needed */
7319 ci = gaudi_queue_idx_dec(ci, queue_len);
7322 hdev->asic_funcs->hw_queues_unlock(hdev);
7326 * print_qman_data_on_err - extract QMAN data on error
7328 * @hdev: pointer to the habanalabs device structure
7329 * @qid_base: first QID of the QMAN (out of 4 streams)
7330 * @stream: the QMAN's stream
7331 * @qman_base: base address of QMAN registers block
7333 * This function attempt to exatract as much data as possible on QMAN error.
7334 * On upper CP print the SW config stream data and last 8 PQEs.
7335 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7337 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7338 u32 stream, u64 qman_base)
7342 if (stream != QMAN_STREAMS) {
7343 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7348 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7350 for (i = 0; i < QMAN_STREAMS; i++)
7351 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7355 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7356 const char *qm_name,
7360 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7361 u64 glbl_sts_addr, arb_err_addr;
7364 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7365 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7367 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7368 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7369 glbl_sts_clr_val = 0;
7370 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7375 if (i == QMAN_STREAMS)
7376 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7378 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7380 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7381 if (glbl_sts_val & BIT(j)) {
7382 dev_err_ratelimited(hdev->dev,
7383 "%s %s. err cause: %s\n",
7385 gaudi_qman_error_cause[j]);
7386 glbl_sts_clr_val |= BIT(j);
7390 /* Write 1 clear errors */
7391 if (!hdev->stop_on_err)
7392 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7394 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7397 arb_err_val = RREG32(arb_err_addr);
7402 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7403 if (arb_err_val & BIT(j)) {
7404 dev_err_ratelimited(hdev->dev,
7405 "%s ARB_ERR. err cause: %s\n",
7407 gaudi_qman_arb_error_cause[j]);
7412 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7413 struct hl_eq_sm_sei_data *sei_data)
7415 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7417 switch (sei_data->sei_cause) {
7418 case SM_SEI_SO_OVERFLOW:
7420 "SM %u SEI Error: SO %u overflow/underflow",
7421 index, le32_to_cpu(sei_data->sei_log));
7423 case SM_SEI_LBW_4B_UNALIGNED:
7425 "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7426 index, le32_to_cpu(sei_data->sei_log));
7428 case SM_SEI_AXI_RESPONSE_ERR:
7430 "SM %u SEI Error: AXI ID %u response error",
7431 index, le32_to_cpu(sei_data->sei_log));
7434 dev_err(hdev->dev, "Unknown SM SEI cause %u",
7435 le32_to_cpu(sei_data->sei_log));
7440 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7441 struct hl_eq_ecc_data *ecc_data)
7443 struct ecc_info_extract_params params;
7444 u64 ecc_address = 0, ecc_syndrom = 0;
7445 u8 index, memory_wrapper_idx = 0;
7446 bool extract_info_from_fw;
7449 switch (event_type) {
7450 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7451 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7452 extract_info_from_fw = true;
7454 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7455 index = event_type - GAUDI_EVENT_TPC0_SERR;
7456 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7457 params.num_memories = 90;
7458 params.derr = false;
7459 params.disable_clock_gating = true;
7460 extract_info_from_fw = false;
7462 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7463 index = event_type - GAUDI_EVENT_TPC0_DERR;
7464 params.block_address =
7465 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7466 params.num_memories = 90;
7468 params.disable_clock_gating = true;
7469 extract_info_from_fw = false;
7471 case GAUDI_EVENT_MME0_ACC_SERR:
7472 case GAUDI_EVENT_MME1_ACC_SERR:
7473 case GAUDI_EVENT_MME2_ACC_SERR:
7474 case GAUDI_EVENT_MME3_ACC_SERR:
7475 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7476 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7477 params.num_memories = 128;
7478 params.derr = false;
7479 params.disable_clock_gating = true;
7480 extract_info_from_fw = false;
7482 case GAUDI_EVENT_MME0_ACC_DERR:
7483 case GAUDI_EVENT_MME1_ACC_DERR:
7484 case GAUDI_EVENT_MME2_ACC_DERR:
7485 case GAUDI_EVENT_MME3_ACC_DERR:
7486 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7487 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7488 params.num_memories = 128;
7490 params.disable_clock_gating = true;
7491 extract_info_from_fw = false;
7493 case GAUDI_EVENT_MME0_SBAB_SERR:
7494 case GAUDI_EVENT_MME1_SBAB_SERR:
7495 case GAUDI_EVENT_MME2_SBAB_SERR:
7496 case GAUDI_EVENT_MME3_SBAB_SERR:
7497 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7498 params.block_address =
7499 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7500 params.num_memories = 33;
7501 params.derr = false;
7502 params.disable_clock_gating = true;
7503 extract_info_from_fw = false;
7505 case GAUDI_EVENT_MME0_SBAB_DERR:
7506 case GAUDI_EVENT_MME1_SBAB_DERR:
7507 case GAUDI_EVENT_MME2_SBAB_DERR:
7508 case GAUDI_EVENT_MME3_SBAB_DERR:
7509 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7510 params.block_address =
7511 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7512 params.num_memories = 33;
7514 params.disable_clock_gating = true;
7515 extract_info_from_fw = false;
7521 if (extract_info_from_fw) {
7522 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7523 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7524 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7526 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7527 &ecc_syndrom, &memory_wrapper_idx);
7533 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7534 ecc_address, ecc_syndrom, memory_wrapper_idx);
7537 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7544 switch (event_type) {
7545 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7546 index = event_type - GAUDI_EVENT_TPC0_QM;
7547 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7548 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7549 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7551 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7552 index = event_type - GAUDI_EVENT_MME0_QM;
7553 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7554 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7555 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7557 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7558 index = event_type - GAUDI_EVENT_DMA0_QM;
7559 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7560 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7563 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7564 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7566 case GAUDI_EVENT_NIC0_QM0:
7567 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7568 qman_base = mmNIC0_QM0_BASE;
7569 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7571 case GAUDI_EVENT_NIC0_QM1:
7572 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7573 qman_base = mmNIC0_QM1_BASE;
7574 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7576 case GAUDI_EVENT_NIC1_QM0:
7577 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7578 qman_base = mmNIC1_QM0_BASE;
7579 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7581 case GAUDI_EVENT_NIC1_QM1:
7582 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7583 qman_base = mmNIC1_QM1_BASE;
7584 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7586 case GAUDI_EVENT_NIC2_QM0:
7587 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7588 qman_base = mmNIC2_QM0_BASE;
7589 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7591 case GAUDI_EVENT_NIC2_QM1:
7592 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7593 qman_base = mmNIC2_QM1_BASE;
7594 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7596 case GAUDI_EVENT_NIC3_QM0:
7597 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7598 qman_base = mmNIC3_QM0_BASE;
7599 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7601 case GAUDI_EVENT_NIC3_QM1:
7602 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7603 qman_base = mmNIC3_QM1_BASE;
7604 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7606 case GAUDI_EVENT_NIC4_QM0:
7607 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7608 qman_base = mmNIC4_QM0_BASE;
7609 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7611 case GAUDI_EVENT_NIC4_QM1:
7612 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7613 qman_base = mmNIC4_QM1_BASE;
7614 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7620 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7623 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7628 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7629 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7633 gaudi_print_razwi_info(hdev);
7634 gaudi_print_mmu_error_info(hdev);
7638 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7639 struct cpucp_pkt_sync_err *sync_err)
7641 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7643 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7644 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7647 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7648 struct hl_eq_fw_alive *fw_alive)
7651 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7652 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7653 "Minor" : "Critical", fw_alive->process_id,
7654 fw_alive->thread_id, fw_alive->uptime_seconds);
7657 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7659 struct gaudi_device *gaudi = hdev->asic_specific;
7661 /* Unmask all IRQs since some could have been received
7662 * during the soft reset
7664 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7667 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7668 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7670 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7673 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7674 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7675 if (!hbm_ecc_data) {
7676 dev_err(hdev->dev, "No FW ECC data");
7680 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7681 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7682 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7683 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7684 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7685 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7686 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7687 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7688 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7689 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7690 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7691 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7692 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7693 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7696 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7697 device, ch, wr_par, rd_par, ca_par, serr, derr);
7699 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7700 device, ch, hbm_ecc_data->first_addr, type,
7701 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7702 hbm_ecc_data->dec_cnt);
7706 if (hdev->asic_prop.fw_security_enabled) {
7707 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7711 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7712 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7713 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7714 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7718 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7719 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7720 (val >> 2) & 0x1, (val >> 3) & 0x1,
7723 val2 = RREG32(base + ch * 0x1000 + 0x060);
7725 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7727 RREG32(base + ch * 0x1000 + 0x064),
7728 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7729 (val2 & 0xFF0000) >> 16,
7730 (val2 & 0xFF000000) >> 24);
7733 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7734 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7738 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7739 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7740 (val >> 2) & 0x1, (val >> 3) & 0x1,
7743 val2 = RREG32(base + ch * 0x1000 + 0x070);
7745 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7747 RREG32(base + ch * 0x1000 + 0x074),
7748 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7749 (val2 & 0xFF0000) >> 16,
7750 (val2 & 0xFF000000) >> 24);
7753 /* Clear interrupts */
7754 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7755 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7756 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7757 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7758 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7759 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7762 val = RREG32(base + 0x8F30);
7763 val2 = RREG32(base + 0x8F34);
7767 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7770 val = RREG32(base + 0x8F40);
7771 val2 = RREG32(base + 0x8F44);
7775 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7782 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7784 switch (hbm_event_type) {
7785 case GAUDI_EVENT_HBM0_SPI_0:
7786 case GAUDI_EVENT_HBM0_SPI_1:
7788 case GAUDI_EVENT_HBM1_SPI_0:
7789 case GAUDI_EVENT_HBM1_SPI_1:
7791 case GAUDI_EVENT_HBM2_SPI_0:
7792 case GAUDI_EVENT_HBM2_SPI_1:
7794 case GAUDI_EVENT_HBM3_SPI_0:
7795 case GAUDI_EVENT_HBM3_SPI_1:
7801 /* Should never happen */
7805 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7806 char *interrupt_name)
7808 struct gaudi_device *gaudi = hdev->asic_specific;
7809 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7810 bool soft_reset_required = false;
7812 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7813 * gating, and thus cannot be done in CPU-CP and should be done instead
7817 mutex_lock(&gaudi->clk_gate_mutex);
7819 hdev->asic_funcs->disable_clock_gating(hdev);
7821 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7822 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7824 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7825 if (tpc_interrupts_cause & BIT(i)) {
7826 dev_err_ratelimited(hdev->dev,
7827 "TPC%d_%s interrupt cause: %s\n",
7828 tpc_id, interrupt_name,
7829 gaudi_tpc_interrupts_cause[i]);
7830 /* If this is QM error, we need to soft-reset */
7832 soft_reset_required = true;
7835 /* Clear interrupts */
7836 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7838 hdev->asic_funcs->set_clock_gating(hdev);
7840 mutex_unlock(&gaudi->clk_gate_mutex);
7842 return soft_reset_required;
7845 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7847 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7850 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7852 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7855 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7858 switch (event_type) {
7859 case GAUDI_EVENT_FIX_POWER_ENV_S:
7860 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7861 dev_info_ratelimited(hdev->dev,
7862 "Clock throttling due to power consumption\n");
7865 case GAUDI_EVENT_FIX_POWER_ENV_E:
7866 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7867 dev_info_ratelimited(hdev->dev,
7868 "Power envelop is safe, back to optimal clock\n");
7871 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7872 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7873 dev_info_ratelimited(hdev->dev,
7874 "Clock throttling due to overheating\n");
7877 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7878 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7879 dev_info_ratelimited(hdev->dev,
7880 "Thermal envelop is safe, back to optimal clock\n");
7884 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7890 static void gaudi_handle_eqe(struct hl_device *hdev,
7891 struct hl_eq_entry *eq_entry)
7893 struct gaudi_device *gaudi = hdev->asic_specific;
7894 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7895 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7896 >> EQ_CTL_EVENT_TYPE_SHIFT);
7898 bool reset_required;
7900 gaudi->events_stat[event_type]++;
7901 gaudi->events_stat_aggregate[event_type]++;
7903 switch (event_type) {
7904 case GAUDI_EVENT_PCIE_CORE_DERR:
7905 case GAUDI_EVENT_PCIE_IF_DERR:
7906 case GAUDI_EVENT_PCIE_PHY_DERR:
7907 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7908 case GAUDI_EVENT_MME0_ACC_DERR:
7909 case GAUDI_EVENT_MME0_SBAB_DERR:
7910 case GAUDI_EVENT_MME1_ACC_DERR:
7911 case GAUDI_EVENT_MME1_SBAB_DERR:
7912 case GAUDI_EVENT_MME2_ACC_DERR:
7913 case GAUDI_EVENT_MME2_SBAB_DERR:
7914 case GAUDI_EVENT_MME3_ACC_DERR:
7915 case GAUDI_EVENT_MME3_SBAB_DERR:
7916 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7918 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7919 case GAUDI_EVENT_PSOC_MEM_DERR:
7920 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7921 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7922 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7923 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7924 case GAUDI_EVENT_MMU_DERR:
7925 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7926 gaudi_print_irq_info(hdev, event_type, true);
7927 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7930 case GAUDI_EVENT_GIC500:
7931 case GAUDI_EVENT_AXI_ECC:
7932 case GAUDI_EVENT_L2_RAM_ECC:
7933 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7934 gaudi_print_irq_info(hdev, event_type, false);
7937 case GAUDI_EVENT_HBM0_SPI_0:
7938 case GAUDI_EVENT_HBM1_SPI_0:
7939 case GAUDI_EVENT_HBM2_SPI_0:
7940 case GAUDI_EVENT_HBM3_SPI_0:
7941 gaudi_print_irq_info(hdev, event_type, false);
7942 gaudi_hbm_read_interrupts(hdev,
7943 gaudi_hbm_event_to_dev(event_type),
7944 &eq_entry->hbm_ecc_data);
7947 case GAUDI_EVENT_HBM0_SPI_1:
7948 case GAUDI_EVENT_HBM1_SPI_1:
7949 case GAUDI_EVENT_HBM2_SPI_1:
7950 case GAUDI_EVENT_HBM3_SPI_1:
7951 gaudi_print_irq_info(hdev, event_type, false);
7952 gaudi_hbm_read_interrupts(hdev,
7953 gaudi_hbm_event_to_dev(event_type),
7954 &eq_entry->hbm_ecc_data);
7955 hl_fw_unmask_irq(hdev, event_type);
7958 case GAUDI_EVENT_TPC0_DEC:
7959 case GAUDI_EVENT_TPC1_DEC:
7960 case GAUDI_EVENT_TPC2_DEC:
7961 case GAUDI_EVENT_TPC3_DEC:
7962 case GAUDI_EVENT_TPC4_DEC:
7963 case GAUDI_EVENT_TPC5_DEC:
7964 case GAUDI_EVENT_TPC6_DEC:
7965 case GAUDI_EVENT_TPC7_DEC:
7966 gaudi_print_irq_info(hdev, event_type, true);
7967 reset_required = gaudi_tpc_read_interrupts(hdev,
7968 tpc_dec_event_to_tpc_id(event_type),
7969 "AXI_SLV_DEC_Error");
7970 if (reset_required) {
7971 dev_err(hdev->dev, "hard reset required due to %s\n",
7972 gaudi_irq_map_table[event_type].name);
7976 hl_fw_unmask_irq(hdev, event_type);
7980 case GAUDI_EVENT_TPC0_KRN_ERR:
7981 case GAUDI_EVENT_TPC1_KRN_ERR:
7982 case GAUDI_EVENT_TPC2_KRN_ERR:
7983 case GAUDI_EVENT_TPC3_KRN_ERR:
7984 case GAUDI_EVENT_TPC4_KRN_ERR:
7985 case GAUDI_EVENT_TPC5_KRN_ERR:
7986 case GAUDI_EVENT_TPC6_KRN_ERR:
7987 case GAUDI_EVENT_TPC7_KRN_ERR:
7988 gaudi_print_irq_info(hdev, event_type, true);
7989 reset_required = gaudi_tpc_read_interrupts(hdev,
7990 tpc_krn_event_to_tpc_id(event_type),
7992 if (reset_required) {
7993 dev_err(hdev->dev, "hard reset required due to %s\n",
7994 gaudi_irq_map_table[event_type].name);
7998 hl_fw_unmask_irq(hdev, event_type);
8002 case GAUDI_EVENT_PCIE_CORE_SERR:
8003 case GAUDI_EVENT_PCIE_IF_SERR:
8004 case GAUDI_EVENT_PCIE_PHY_SERR:
8005 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8006 case GAUDI_EVENT_MME0_ACC_SERR:
8007 case GAUDI_EVENT_MME0_SBAB_SERR:
8008 case GAUDI_EVENT_MME1_ACC_SERR:
8009 case GAUDI_EVENT_MME1_SBAB_SERR:
8010 case GAUDI_EVENT_MME2_ACC_SERR:
8011 case GAUDI_EVENT_MME2_SBAB_SERR:
8012 case GAUDI_EVENT_MME3_ACC_SERR:
8013 case GAUDI_EVENT_MME3_SBAB_SERR:
8014 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8015 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8016 case GAUDI_EVENT_PSOC_MEM_SERR:
8017 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8018 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8019 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8020 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8022 case GAUDI_EVENT_MMU_SERR:
8023 gaudi_print_irq_info(hdev, event_type, true);
8024 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8025 hl_fw_unmask_irq(hdev, event_type);
8028 case GAUDI_EVENT_PCIE_DEC:
8029 case GAUDI_EVENT_MME0_WBC_RSP:
8030 case GAUDI_EVENT_MME0_SBAB0_RSP:
8031 case GAUDI_EVENT_MME1_WBC_RSP:
8032 case GAUDI_EVENT_MME1_SBAB0_RSP:
8033 case GAUDI_EVENT_MME2_WBC_RSP:
8034 case GAUDI_EVENT_MME2_SBAB0_RSP:
8035 case GAUDI_EVENT_MME3_WBC_RSP:
8036 case GAUDI_EVENT_MME3_SBAB0_RSP:
8037 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8038 case GAUDI_EVENT_PSOC_AXI_DEC:
8039 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8040 case GAUDI_EVENT_MMU_PAGE_FAULT:
8041 case GAUDI_EVENT_MMU_WR_PERM:
8042 case GAUDI_EVENT_RAZWI_OR_ADC:
8043 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8044 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8045 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8047 case GAUDI_EVENT_NIC0_QM0:
8048 case GAUDI_EVENT_NIC0_QM1:
8049 case GAUDI_EVENT_NIC1_QM0:
8050 case GAUDI_EVENT_NIC1_QM1:
8051 case GAUDI_EVENT_NIC2_QM0:
8052 case GAUDI_EVENT_NIC2_QM1:
8053 case GAUDI_EVENT_NIC3_QM0:
8054 case GAUDI_EVENT_NIC3_QM1:
8055 case GAUDI_EVENT_NIC4_QM0:
8056 case GAUDI_EVENT_NIC4_QM1:
8057 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8058 gaudi_print_irq_info(hdev, event_type, true);
8059 gaudi_handle_qman_err(hdev, event_type);
8060 hl_fw_unmask_irq(hdev, event_type);
8063 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8064 gaudi_print_irq_info(hdev, event_type, true);
8067 case GAUDI_EVENT_TPC0_BMON_SPMU:
8068 case GAUDI_EVENT_TPC1_BMON_SPMU:
8069 case GAUDI_EVENT_TPC2_BMON_SPMU:
8070 case GAUDI_EVENT_TPC3_BMON_SPMU:
8071 case GAUDI_EVENT_TPC4_BMON_SPMU:
8072 case GAUDI_EVENT_TPC5_BMON_SPMU:
8073 case GAUDI_EVENT_TPC6_BMON_SPMU:
8074 case GAUDI_EVENT_TPC7_BMON_SPMU:
8075 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8076 gaudi_print_irq_info(hdev, event_type, false);
8077 hl_fw_unmask_irq(hdev, event_type);
8080 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8081 gaudi_print_irq_info(hdev, event_type, false);
8082 gaudi_print_sm_sei_info(hdev, event_type,
8083 &eq_entry->sm_sei_data);
8084 hl_fw_unmask_irq(hdev, event_type);
8087 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8088 gaudi_print_clk_change_info(hdev, event_type);
8089 hl_fw_unmask_irq(hdev, event_type);
8092 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8093 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8095 "Received high temp H/W interrupt %d (cause %d)\n",
8099 case GAUDI_EVENT_DEV_RESET_REQ:
8100 gaudi_print_irq_info(hdev, event_type, false);
8103 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8104 gaudi_print_irq_info(hdev, event_type, false);
8105 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8108 case GAUDI_EVENT_FW_ALIVE_S:
8109 gaudi_print_irq_info(hdev, event_type, false);
8110 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8114 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8122 if (hdev->hard_reset_on_fw_events)
8123 hl_device_reset(hdev, HL_RESET_HARD);
8125 hl_fw_unmask_irq(hdev, event_type);
8128 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8131 struct gaudi_device *gaudi = hdev->asic_specific;
8134 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8135 return gaudi->events_stat_aggregate;
8138 *size = (u32) sizeof(gaudi->events_stat);
8139 return gaudi->events_stat;
8142 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8145 struct gaudi_device *gaudi = hdev->asic_specific;
8146 u32 status, timeout_usec;
8149 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8150 hdev->hard_reset_pending)
8154 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8156 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8158 /* L0 & L1 invalidation */
8159 WREG32(mmSTLB_INV_PS, 3);
8160 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8161 WREG32(mmSTLB_INV_PS, 2);
8163 rc = hl_poll_timeout(
8171 WREG32(mmSTLB_INV_SET, 0);
8174 dev_err_ratelimited(hdev->dev,
8175 "MMU cache invalidation timeout\n");
8176 hl_device_reset(hdev, HL_RESET_HARD);
8182 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8183 bool is_hard, u32 flags,
8184 u32 asid, u64 va, u64 size)
8186 /* Treat as invalidate all because there is no range invalidation
8189 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8192 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8193 u32 asid, u64 phys_addr)
8195 u32 status, timeout_usec;
8199 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8201 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8203 WREG32(MMU_ASID, asid);
8204 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8205 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8206 WREG32(MMU_BUSY, 0x80000000);
8208 rc = hl_poll_timeout(
8212 !(status & 0x80000000),
8218 "Timeout during MMU hop0 config of asid %d\n", asid);
8225 static int gaudi_send_heartbeat(struct hl_device *hdev)
8227 struct gaudi_device *gaudi = hdev->asic_specific;
8229 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8232 return hl_fw_send_heartbeat(hdev);
8235 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8237 struct gaudi_device *gaudi = hdev->asic_specific;
8238 struct asic_fixed_properties *prop = &hdev->asic_prop;
8241 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8244 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8245 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8250 if (!strlen(prop->cpucp_info.card_name))
8251 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8254 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8256 set_default_power_values(hdev);
8258 hdev->max_power = prop->max_power_default;
8263 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8264 u8 mask_len, struct seq_file *s)
8266 struct gaudi_device *gaudi = hdev->asic_specific;
8267 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8268 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8269 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8270 unsigned long *mask = (unsigned long *)mask_arr;
8271 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8272 bool is_idle = true, is_eng_idle, is_slave;
8274 int i, dma_id, port;
8276 mutex_lock(&gaudi->clk_gate_mutex);
8278 hdev->asic_funcs->disable_clock_gating(hdev);
8282 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8283 "--- ------- ------------ ---------- -------------\n");
8285 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8286 dma_id = gaudi_dma_assignment[i];
8287 offset = dma_id * DMA_QMAN_OFFSET;
8289 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8290 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8291 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8292 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8293 IS_DMA_IDLE(dma_core_sts0);
8294 is_idle &= is_eng_idle;
8296 if (mask && !is_eng_idle)
8297 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8299 seq_printf(s, fmt, dma_id,
8300 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8301 qm_cgm_sts, dma_core_sts0);
8306 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8307 "--- ------- ------------ ---------- ----------\n");
8309 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8310 offset = i * TPC_QMAN_OFFSET;
8311 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8312 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8313 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8314 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8315 IS_TPC_IDLE(tpc_cfg_sts);
8316 is_idle &= is_eng_idle;
8318 if (mask && !is_eng_idle)
8319 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8321 seq_printf(s, fmt, i,
8322 is_eng_idle ? "Y" : "N",
8323 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8328 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8329 "--- ------- ------------ ---------- -----------\n");
8331 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8332 offset = i * MME_QMAN_OFFSET;
8333 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8334 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8336 /* MME 1 & 3 are slaves, no need to check their QMANs */
8339 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8340 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8341 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8344 is_idle &= is_eng_idle;
8346 if (mask && !is_eng_idle)
8347 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8350 seq_printf(s, fmt, i,
8351 is_eng_idle ? "Y" : "N",
8352 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8354 seq_printf(s, mme_slave_fmt, i,
8355 is_eng_idle ? "Y" : "N", "-",
8361 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8362 "--- ------- ------------ ----------\n");
8364 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8365 offset = i * NIC_MACRO_QMAN_OFFSET;
8367 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8368 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8369 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8370 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8371 is_idle &= is_eng_idle;
8373 if (mask && !is_eng_idle)
8374 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8376 seq_printf(s, nic_fmt, port,
8377 is_eng_idle ? "Y" : "N",
8378 qm_glbl_sts0, qm_cgm_sts);
8382 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8383 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8384 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8385 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8386 is_idle &= is_eng_idle;
8388 if (mask && !is_eng_idle)
8389 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8391 seq_printf(s, nic_fmt, port,
8392 is_eng_idle ? "Y" : "N",
8393 qm_glbl_sts0, qm_cgm_sts);
8400 hdev->asic_funcs->set_clock_gating(hdev);
8402 mutex_unlock(&gaudi->clk_gate_mutex);
8407 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8408 __acquires(&gaudi->hw_queues_lock)
8410 struct gaudi_device *gaudi = hdev->asic_specific;
8412 spin_lock(&gaudi->hw_queues_lock);
8415 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8416 __releases(&gaudi->hw_queues_lock)
8418 struct gaudi_device *gaudi = hdev->asic_specific;
8420 spin_unlock(&gaudi->hw_queues_lock);
8423 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8425 return hdev->pdev->device;
8428 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8431 struct gaudi_device *gaudi = hdev->asic_specific;
8433 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8436 return hl_fw_get_eeprom_data(hdev, data, max_size);
8440 * this function should be used only during initialization and/or after reset,
8441 * when there are no active users.
8443 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8446 struct gaudi_device *gaudi = hdev->asic_specific;
8451 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8454 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8456 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8458 mutex_lock(&gaudi->clk_gate_mutex);
8460 hdev->asic_funcs->disable_clock_gating(hdev);
8462 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8463 lower_32_bits(tpc_kernel));
8464 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8465 upper_32_bits(tpc_kernel));
8467 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8468 lower_32_bits(tpc_kernel));
8469 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8470 upper_32_bits(tpc_kernel));
8471 /* set a valid LUT pointer, content is of no significance */
8472 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8473 lower_32_bits(tpc_kernel));
8474 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8475 upper_32_bits(tpc_kernel));
8477 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8478 lower_32_bits(CFG_BASE +
8479 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8481 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8482 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8483 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8484 /* wait a bit for the engine to start executing */
8485 usleep_range(1000, 1500);
8487 /* wait until engine has finished executing */
8488 rc = hl_poll_timeout(
8490 mmTPC0_CFG_STATUS + offset,
8492 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8493 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8499 "Timeout while waiting for TPC%d icache prefetch\n",
8501 hdev->asic_funcs->set_clock_gating(hdev);
8502 mutex_unlock(&gaudi->clk_gate_mutex);
8506 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8507 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8509 /* wait a bit for the engine to start executing */
8510 usleep_range(1000, 1500);
8512 /* wait until engine has finished executing */
8513 rc = hl_poll_timeout(
8515 mmTPC0_CFG_STATUS + offset,
8517 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8518 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8524 "Timeout while waiting for TPC%d vector pipe\n",
8526 hdev->asic_funcs->set_clock_gating(hdev);
8527 mutex_unlock(&gaudi->clk_gate_mutex);
8531 rc = hl_poll_timeout(
8533 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8539 hdev->asic_funcs->set_clock_gating(hdev);
8540 mutex_unlock(&gaudi->clk_gate_mutex);
8544 "Timeout while waiting for TPC%d kernel to execute\n",
8552 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8555 struct gaudi_device *gaudi = hdev->asic_specific;
8556 int min_alloc_order, rc, collective_cb_size;
8558 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8561 hdev->internal_cb_pool_virt_addr =
8562 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8563 HOST_SPACE_INTERNAL_CB_SZ,
8564 &hdev->internal_cb_pool_dma_addr,
8565 GFP_KERNEL | __GFP_ZERO);
8567 if (!hdev->internal_cb_pool_virt_addr)
8570 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8571 sizeof(struct packet_fence);
8572 min_alloc_order = ilog2(collective_cb_size);
8574 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8575 if (!hdev->internal_cb_pool) {
8577 "Failed to create internal CB pool\n");
8579 goto free_internal_cb_pool;
8582 rc = gen_pool_add(hdev->internal_cb_pool,
8583 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8584 HOST_SPACE_INTERNAL_CB_SZ, -1);
8587 "Failed to add memory to internal CB pool\n");
8589 goto destroy_internal_cb_pool;
8592 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8593 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8594 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8596 if (!hdev->internal_cb_va_base) {
8598 goto destroy_internal_cb_pool;
8601 mutex_lock(&ctx->mmu_lock);
8602 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8603 hdev->internal_cb_pool_dma_addr,
8604 HOST_SPACE_INTERNAL_CB_SZ);
8606 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8607 mutex_unlock(&ctx->mmu_lock);
8610 goto unreserve_internal_cb_pool;
8614 unreserve_internal_cb_pool:
8615 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8616 HOST_SPACE_INTERNAL_CB_SZ);
8617 destroy_internal_cb_pool:
8618 gen_pool_destroy(hdev->internal_cb_pool);
8619 free_internal_cb_pool:
8620 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8621 HOST_SPACE_INTERNAL_CB_SZ,
8622 hdev->internal_cb_pool_virt_addr,
8623 hdev->internal_cb_pool_dma_addr);
8628 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8631 struct gaudi_device *gaudi = hdev->asic_specific;
8633 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8636 mutex_lock(&ctx->mmu_lock);
8637 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8638 HOST_SPACE_INTERNAL_CB_SZ);
8639 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8640 HOST_SPACE_INTERNAL_CB_SZ);
8641 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8642 mutex_unlock(&ctx->mmu_lock);
8644 gen_pool_destroy(hdev->internal_cb_pool);
8646 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8647 HOST_SPACE_INTERNAL_CB_SZ,
8648 hdev->internal_cb_pool_virt_addr,
8649 hdev->internal_cb_pool_dma_addr);
8652 static int gaudi_ctx_init(struct hl_ctx *ctx)
8654 if (ctx->asid == HL_KERNEL_ASID_ID)
8657 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8658 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8661 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8663 if (ctx->asid == HL_KERNEL_ASID_ID)
8666 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8669 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8671 return gaudi_cq_assignment[cq_idx];
8674 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8676 return sizeof(struct packet_msg_short) +
8677 sizeof(struct packet_msg_prot) * 2;
8680 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8682 return sizeof(struct packet_msg_short) * 4 +
8683 sizeof(struct packet_fence) +
8684 sizeof(struct packet_msg_prot) * 2;
8687 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8690 struct hl_cb *cb = (struct hl_cb *) data;
8691 struct packet_msg_short *pkt;
8692 u32 value, ctl, pkt_size = sizeof(*pkt);
8694 pkt = cb->kernel_address + size;
8695 memset(pkt, 0, pkt_size);
8697 /* Inc by 1, Mode ADD */
8698 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8699 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8701 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8702 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8703 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8704 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8705 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8706 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8707 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8709 pkt->value = cpu_to_le32(value);
8710 pkt->ctl = cpu_to_le32(ctl);
8712 return size + pkt_size;
8715 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8718 u32 ctl, pkt_size = sizeof(*pkt);
8720 memset(pkt, 0, pkt_size);
8722 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8723 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8724 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8725 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8726 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8727 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8729 pkt->value = cpu_to_le32(value);
8730 pkt->ctl = cpu_to_le32(ctl);
8735 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8736 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8737 u16 sob_val, u16 mon_id)
8740 u32 ctl, value, pkt_size = sizeof(*pkt);
8741 u16 msg_addr_offset;
8744 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8746 "sob_base %u (mask %#x) is not valid\n",
8747 sob_base, sob_mask);
8752 * monitor_base should be the content of the base0 address registers,
8753 * so it will be added to the msg short offsets
8755 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8758 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8761 memset(pkt, 0, pkt_size);
8763 /* Monitor config packet: bind the monitor to a sync object */
8764 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8765 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8766 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8767 0); /* GREATER OR EQUAL*/
8768 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8770 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8771 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8772 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8773 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8774 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8775 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8776 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8778 pkt->value = cpu_to_le32(value);
8779 pkt->ctl = cpu_to_le32(ctl);
8784 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8786 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8788 memset(pkt, 0, pkt_size);
8790 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8791 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8792 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8794 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8795 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8796 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8797 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8799 pkt->cfg = cpu_to_le32(cfg);
8800 pkt->ctl = cpu_to_le32(ctl);
8805 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8807 u32 offset, nic_index;
8810 case GAUDI_QUEUE_ID_DMA_0_0:
8811 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8813 case GAUDI_QUEUE_ID_DMA_0_1:
8814 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8816 case GAUDI_QUEUE_ID_DMA_0_2:
8817 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8819 case GAUDI_QUEUE_ID_DMA_0_3:
8820 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8822 case GAUDI_QUEUE_ID_DMA_1_0:
8823 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8825 case GAUDI_QUEUE_ID_DMA_1_1:
8826 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8828 case GAUDI_QUEUE_ID_DMA_1_2:
8829 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8831 case GAUDI_QUEUE_ID_DMA_1_3:
8832 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8834 case GAUDI_QUEUE_ID_DMA_5_0:
8835 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8837 case GAUDI_QUEUE_ID_DMA_5_1:
8838 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8840 case GAUDI_QUEUE_ID_DMA_5_2:
8841 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8843 case GAUDI_QUEUE_ID_DMA_5_3:
8844 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8846 case GAUDI_QUEUE_ID_TPC_7_0:
8847 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8849 case GAUDI_QUEUE_ID_TPC_7_1:
8850 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8852 case GAUDI_QUEUE_ID_TPC_7_2:
8853 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8855 case GAUDI_QUEUE_ID_TPC_7_3:
8856 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8858 case GAUDI_QUEUE_ID_NIC_0_0:
8859 case GAUDI_QUEUE_ID_NIC_1_0:
8860 case GAUDI_QUEUE_ID_NIC_2_0:
8861 case GAUDI_QUEUE_ID_NIC_3_0:
8862 case GAUDI_QUEUE_ID_NIC_4_0:
8863 case GAUDI_QUEUE_ID_NIC_5_0:
8864 case GAUDI_QUEUE_ID_NIC_6_0:
8865 case GAUDI_QUEUE_ID_NIC_7_0:
8866 case GAUDI_QUEUE_ID_NIC_8_0:
8867 case GAUDI_QUEUE_ID_NIC_9_0:
8868 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8869 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8870 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8871 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8873 case GAUDI_QUEUE_ID_NIC_0_1:
8874 case GAUDI_QUEUE_ID_NIC_1_1:
8875 case GAUDI_QUEUE_ID_NIC_2_1:
8876 case GAUDI_QUEUE_ID_NIC_3_1:
8877 case GAUDI_QUEUE_ID_NIC_4_1:
8878 case GAUDI_QUEUE_ID_NIC_5_1:
8879 case GAUDI_QUEUE_ID_NIC_6_1:
8880 case GAUDI_QUEUE_ID_NIC_7_1:
8881 case GAUDI_QUEUE_ID_NIC_8_1:
8882 case GAUDI_QUEUE_ID_NIC_9_1:
8883 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8884 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8885 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8886 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8888 case GAUDI_QUEUE_ID_NIC_0_2:
8889 case GAUDI_QUEUE_ID_NIC_1_2:
8890 case GAUDI_QUEUE_ID_NIC_2_2:
8891 case GAUDI_QUEUE_ID_NIC_3_2:
8892 case GAUDI_QUEUE_ID_NIC_4_2:
8893 case GAUDI_QUEUE_ID_NIC_5_2:
8894 case GAUDI_QUEUE_ID_NIC_6_2:
8895 case GAUDI_QUEUE_ID_NIC_7_2:
8896 case GAUDI_QUEUE_ID_NIC_8_2:
8897 case GAUDI_QUEUE_ID_NIC_9_2:
8898 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8899 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8900 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8901 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8903 case GAUDI_QUEUE_ID_NIC_0_3:
8904 case GAUDI_QUEUE_ID_NIC_1_3:
8905 case GAUDI_QUEUE_ID_NIC_2_3:
8906 case GAUDI_QUEUE_ID_NIC_3_3:
8907 case GAUDI_QUEUE_ID_NIC_4_3:
8908 case GAUDI_QUEUE_ID_NIC_5_3:
8909 case GAUDI_QUEUE_ID_NIC_6_3:
8910 case GAUDI_QUEUE_ID_NIC_7_3:
8911 case GAUDI_QUEUE_ID_NIC_8_3:
8912 case GAUDI_QUEUE_ID_NIC_9_3:
8913 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8914 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8915 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8916 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8922 *addr = CFG_BASE + offset;
8927 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8931 u16 msg_addr_offset;
8934 * monitor_base should be the content of the base0 address registers,
8935 * so it will be added to the msg short offsets
8937 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8939 /* First monitor config packet: low address of the sync */
8941 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8944 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8947 /* Second monitor config packet: high address of the sync */
8949 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8952 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8956 * Third monitor config packet: the payload, i.e. what to write when the
8960 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8963 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8968 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8969 struct hl_gen_wait_properties *prop)
8971 struct hl_cb *cb = (struct hl_cb *) prop->data;
8972 void *buf = cb->kernel_address;
8974 u32 size = prop->size;
8976 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8977 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8982 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8983 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8984 prop->sob_mask, prop->sob_val, prop->mon_id);
8985 size += gaudi_add_fence_pkt(buf + size);
8990 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8992 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8995 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8998 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8999 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9000 hw_sob->sob_id * 4, 1, 0);
9002 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
9004 kref_init(&hw_sob->kref);
9007 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9009 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9010 HL_POWER9_HOST_MAGIC) {
9011 hdev->power9_64bit_dma_enable = 1;
9012 hdev->dma_mask = 64;
9014 hdev->power9_64bit_dma_enable = 0;
9015 hdev->dma_mask = 48;
9019 static u64 gaudi_get_device_time(struct hl_device *hdev)
9021 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9023 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9026 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9027 u32 *block_size, u32 *block_id)
9032 static int gaudi_block_mmap(struct hl_device *hdev,
9033 struct vm_area_struct *vma,
9034 u32 block_id, u32 block_size)
9039 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9041 struct cpu_dyn_regs *dyn_regs =
9042 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9043 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9044 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9045 le32_to_cpu(dyn_regs->gic_host_ints_irq);
9047 WREG32(irq_handler_offset,
9048 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9051 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9054 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9055 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9056 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9057 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9058 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9059 case HL_GAUDI_MME_PLL: return MME_PLL;
9060 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9061 case HL_GAUDI_IF_PLL: return IF_PLL;
9062 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9063 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9064 default: return -EINVAL;
9068 static int gaudi_add_sync_to_engine_map_entry(
9069 struct hl_sync_to_engine_map *map, u32 reg_value,
9070 enum hl_sync_engine_type engine_type, u32 engine_id)
9072 struct hl_sync_to_engine_map_entry *entry;
9074 /* Reg value represents a partial address of sync object,
9075 * it is used as unique identifier. For this we need to
9076 * clear the cutoff cfg base bits from the value.
9078 if (reg_value == 0 || reg_value == 0xffffffff)
9080 reg_value -= (u32)CFG_BASE;
9082 /* create a new hash entry */
9083 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9086 entry->engine_type = engine_type;
9087 entry->engine_id = engine_id;
9088 entry->sync_id = reg_value;
9089 hash_add(map->tb, &entry->node, reg_value);
9094 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9095 struct hl_sync_to_engine_map *map)
9097 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9098 struct gaudi_device *gaudi = hdev->asic_specific;
9102 /* Iterate over TPC engines */
9103 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9104 /* TPC registered must be accessed with clock gating disabled */
9105 mutex_lock(&gaudi->clk_gate_mutex);
9106 hdev->asic_funcs->disable_clock_gating(hdev);
9108 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9109 sds->props[SP_NEXT_TPC] * i);
9111 /* We can reenable clock_gating */
9112 hdev->asic_funcs->set_clock_gating(hdev);
9113 mutex_unlock(&gaudi->clk_gate_mutex);
9115 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9118 goto free_sync_to_engine_map;
9121 /* Iterate over MME engines */
9122 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9123 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9124 /* MME registered must be accessed with clock gating
9127 mutex_lock(&gaudi->clk_gate_mutex);
9128 hdev->asic_funcs->disable_clock_gating(hdev);
9130 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9131 sds->props[SP_NEXT_MME] * i +
9134 /* We can reenable clock_gating */
9135 hdev->asic_funcs->set_clock_gating(hdev);
9136 mutex_unlock(&gaudi->clk_gate_mutex);
9138 rc = gaudi_add_sync_to_engine_map_entry(
9139 map, reg_value, ENGINE_MME,
9140 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9142 goto free_sync_to_engine_map;
9146 /* Iterate over DMA engines */
9147 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9148 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9149 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9150 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9153 goto free_sync_to_engine_map;
9158 free_sync_to_engine_map:
9159 hl_state_dump_free_sync_to_engine_map(map);
9164 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9167 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9171 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9172 struct hl_device *hdev,
9173 struct hl_mon_state_dump *mon)
9176 char scratch_buf1[BIN_REG_STRING_SIZE],
9177 scratch_buf2[BIN_REG_STRING_SIZE];
9179 name = hl_state_dump_get_monitor_name(hdev, mon);
9183 return hl_snprintf_resize(
9185 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9187 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9189 hl_format_as_binary(
9190 scratch_buf1, sizeof(scratch_buf1),
9192 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9194 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9197 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9198 hl_format_as_binary(
9199 scratch_buf2, sizeof(scratch_buf2),
9201 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9206 static int gaudi_print_fences_single_engine(
9207 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9208 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9209 size_t *size, size_t *offset)
9211 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9212 int rc = -ENOMEM, i;
9213 u32 *statuses, *fences;
9215 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9216 sizeof(*statuses), GFP_KERNEL);
9220 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9221 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9222 sizeof(*fences), GFP_KERNEL);
9226 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9227 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9229 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9230 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9231 fences[i] = RREG32(base_offset + i * sizeof(u32));
9233 /* The actual print */
9234 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9236 u64 fence_cnt, fence_rdata;
9237 const char *engine_name;
9239 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9244 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9245 fence_cnt = base_offset + CFG_BASE +
9247 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9248 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9249 sds->props[SP_FENCE0_RDATA_OFFSET];
9250 engine_name = hl_sync_engine_to_string(engine_type);
9252 rc = hl_snprintf_resize(
9254 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9255 engine_name, engine_id,
9257 fence_cnt, engine_name, engine_id, fence_id, i,
9258 fence_rdata, engine_name, engine_id, fence_id, i,
9276 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9277 .monitor_valid = gaudi_monitor_valid,
9278 .print_single_monitor = gaudi_print_single_monitor,
9279 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9280 .print_fences_single_engine = gaudi_print_fences_single_engine,
9283 static void gaudi_state_dump_init(struct hl_device *hdev)
9285 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9288 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9289 hash_add(sds->so_id_to_str_tb,
9290 &gaudi_so_id_to_str[i].node,
9291 gaudi_so_id_to_str[i].id);
9293 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9294 hash_add(sds->monitor_id_to_str_tb,
9295 &gaudi_monitor_id_to_str[i].node,
9296 gaudi_monitor_id_to_str[i].id);
9298 sds->props = gaudi_state_dump_specs_props;
9300 sds->sync_namager_names = gaudi_sync_manager_names;
9302 sds->funcs = gaudi_state_dump_funcs;
9305 static const struct hl_asic_funcs gaudi_funcs = {
9306 .early_init = gaudi_early_init,
9307 .early_fini = gaudi_early_fini,
9308 .late_init = gaudi_late_init,
9309 .late_fini = gaudi_late_fini,
9310 .sw_init = gaudi_sw_init,
9311 .sw_fini = gaudi_sw_fini,
9312 .hw_init = gaudi_hw_init,
9313 .hw_fini = gaudi_hw_fini,
9314 .halt_engines = gaudi_halt_engines,
9315 .suspend = gaudi_suspend,
9316 .resume = gaudi_resume,
9318 .ring_doorbell = gaudi_ring_doorbell,
9319 .pqe_write = gaudi_pqe_write,
9320 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9321 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9322 .scrub_device_mem = gaudi_scrub_device_mem,
9323 .get_int_queue_base = gaudi_get_int_queue_base,
9324 .test_queues = gaudi_test_queues,
9325 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9326 .asic_dma_pool_free = gaudi_dma_pool_free,
9327 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9328 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9329 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9330 .cs_parser = gaudi_cs_parser,
9331 .asic_dma_map_sg = gaudi_dma_map_sg,
9332 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9333 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9334 .update_eq_ci = gaudi_update_eq_ci,
9335 .context_switch = gaudi_context_switch,
9336 .restore_phase_topology = gaudi_restore_phase_topology,
9337 .debugfs_read32 = gaudi_debugfs_read32,
9338 .debugfs_write32 = gaudi_debugfs_write32,
9339 .debugfs_read64 = gaudi_debugfs_read64,
9340 .debugfs_write64 = gaudi_debugfs_write64,
9341 .debugfs_read_dma = gaudi_debugfs_read_dma,
9342 .add_device_attr = gaudi_add_device_attr,
9343 .handle_eqe = gaudi_handle_eqe,
9344 .set_pll_profile = gaudi_set_pll_profile,
9345 .get_events_stat = gaudi_get_events_stat,
9346 .read_pte = gaudi_read_pte,
9347 .write_pte = gaudi_write_pte,
9348 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9349 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9350 .send_heartbeat = gaudi_send_heartbeat,
9351 .set_clock_gating = gaudi_set_clock_gating,
9352 .disable_clock_gating = gaudi_disable_clock_gating,
9353 .debug_coresight = gaudi_debug_coresight,
9354 .is_device_idle = gaudi_is_device_idle,
9355 .soft_reset_late_init = gaudi_soft_reset_late_init,
9356 .hw_queues_lock = gaudi_hw_queues_lock,
9357 .hw_queues_unlock = gaudi_hw_queues_unlock,
9358 .get_pci_id = gaudi_get_pci_id,
9359 .get_eeprom_data = gaudi_get_eeprom_data,
9360 .send_cpu_message = gaudi_send_cpu_message,
9361 .pci_bars_map = gaudi_pci_bars_map,
9362 .init_iatu = gaudi_init_iatu,
9365 .halt_coresight = gaudi_halt_coresight,
9366 .ctx_init = gaudi_ctx_init,
9367 .ctx_fini = gaudi_ctx_fini,
9368 .get_clk_rate = gaudi_get_clk_rate,
9369 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9370 .load_firmware_to_device = gaudi_load_firmware_to_device,
9371 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9372 .get_signal_cb_size = gaudi_get_signal_cb_size,
9373 .get_wait_cb_size = gaudi_get_wait_cb_size,
9374 .gen_signal_cb = gaudi_gen_signal_cb,
9375 .gen_wait_cb = gaudi_gen_wait_cb,
9376 .reset_sob = gaudi_reset_sob,
9377 .reset_sob_group = gaudi_reset_sob_group,
9378 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9379 .get_device_time = gaudi_get_device_time,
9380 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9381 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9382 .scramble_addr = hl_mmu_scramble_addr,
9383 .descramble_addr = hl_mmu_descramble_addr,
9384 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9385 .get_hw_block_id = gaudi_get_hw_block_id,
9386 .hw_block_mmap = gaudi_block_mmap,
9387 .enable_events_from_fw = gaudi_enable_events_from_fw,
9388 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9389 .init_firmware_loader = gaudi_init_firmware_loader,
9390 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9391 .state_dump_init = gaudi_state_dump_init
9395 * gaudi_set_asic_funcs - set GAUDI function pointers
9397 * @hdev: pointer to hl_device structure
9400 void gaudi_set_asic_funcs(struct hl_device *hdev)
9402 hdev->asic_funcs = &gaudi_funcs;