1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2022 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85 #define GAUDI_MAX_STRING_LEN 20
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
107 #define GAUDI_PLL_MAX 10
109 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
111 #define MONITOR_SOB_STRING_SIZE 256
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114 GAUDI_QUEUE_ID_DMA_0_0,
115 GAUDI_QUEUE_ID_DMA_0_1,
116 GAUDI_QUEUE_ID_DMA_0_2,
117 GAUDI_QUEUE_ID_DMA_0_3,
118 GAUDI_QUEUE_ID_DMA_1_0,
119 GAUDI_QUEUE_ID_DMA_1_1,
120 GAUDI_QUEUE_ID_DMA_1_2,
121 GAUDI_QUEUE_ID_DMA_1_3
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143 [0] = GAUDI_QUEUE_ID_DMA_0_0,
144 [1] = GAUDI_QUEUE_ID_DMA_0_1,
145 [2] = GAUDI_QUEUE_ID_DMA_0_2,
146 [3] = GAUDI_QUEUE_ID_DMA_0_3,
147 [4] = GAUDI_QUEUE_ID_DMA_1_0,
148 [5] = GAUDI_QUEUE_ID_DMA_1_1,
149 [6] = GAUDI_QUEUE_ID_DMA_1_2,
150 [7] = GAUDI_QUEUE_ID_DMA_1_3,
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
155 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
156 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
157 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
158 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
159 [PACKET_REPEAT] = sizeof(struct packet_repeat),
160 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
161 [PACKET_FENCE] = sizeof(struct packet_fence),
162 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
163 [PACKET_NOP] = sizeof(struct packet_nop),
164 [PACKET_STOP] = sizeof(struct packet_stop),
165 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
166 [PACKET_WAIT] = sizeof(struct packet_wait),
167 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
170 static inline bool validate_packet_id(enum packet_id id)
174 case PACKET_WREG_BULK:
175 case PACKET_MSG_LONG:
176 case PACKET_MSG_SHORT:
179 case PACKET_MSG_PROT:
184 case PACKET_ARB_POINT:
186 case PACKET_LOAD_AND_EXE:
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195 "tpc_address_exceed_slm",
197 "tpc_spu_mac_overflow",
198 "tpc_spu_addsub_overflow",
199 "tpc_spu_abs_overflow",
200 "tpc_spu_fp_dst_nan_inf",
201 "tpc_spu_fp_dst_denorm",
202 "tpc_vpu_mac_overflow",
203 "tpc_vpu_addsub_overflow",
204 "tpc_vpu_abs_overflow",
205 "tpc_vpu_fp_dst_nan_inf",
206 "tpc_vpu_fp_dst_denorm",
208 "tpc_illegal_instruction",
209 "tpc_pc_wrap_around",
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
222 "CP error due to undefined OPCODE",
223 "CP encountered STOP OPCODE",
225 "CP WRREG32 or WRBULK returned error",
227 "FENCE 0 inc over max value and clipped",
228 "FENCE 1 inc over max value and clipped",
229 "FENCE 2 inc over max value and clipped",
230 "FENCE 3 inc over max value and clipped",
231 "FENCE 0 dec under min value and clipped",
232 "FENCE 1 dec under min value and clipped",
233 "FENCE 2 dec under min value and clipped",
234 "FENCE 3 dec under min value and clipped"
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239 "Choice push while full error",
240 "Choice Q watchdog error",
241 "MSG AXI LBW returned with error"
244 enum gaudi_sm_sei_cause {
245 GAUDI_SM_SEI_SO_OVERFLOW,
246 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247 GAUDI_SM_SEI_AXI_RESPONSE_ERR
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
410 static s64 gaudi_state_dump_specs_props[] = {
411 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414 [SP_MON_OBJ_WR_ADDR_LOW] =
415 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416 [SP_MON_OBJ_WR_ADDR_HIGH] =
417 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438 [SP_FENCE0_CNT_OFFSET] =
439 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440 [SP_FENCE0_RDATA_OFFSET] =
441 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
446 /* The order here is opposite to the order of the indexing in the h/w.
447 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
449 static const char * const gaudi_sync_manager_names[] = {
457 struct ecc_info_extract_params {
463 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
466 struct hl_cs_job *job);
467 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
470 u32 num_regs, u32 val);
471 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
474 static int gaudi_cpucp_info_get(struct hl_device *hdev);
475 static void gaudi_disable_clock_gating(struct hl_device *hdev);
476 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
477 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
480 struct hl_gen_wait_properties *prop);
481 static inline enum hl_collective_mode
482 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
485 return HL_COLLECTIVE_MASTER;
487 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
488 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
489 return HL_COLLECTIVE_SLAVE;
491 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
492 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
493 return HL_COLLECTIVE_SLAVE;
495 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
496 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
497 return HL_COLLECTIVE_SLAVE;
499 return HL_COLLECTIVE_NOT_SUPPORTED;
502 static inline void set_default_power_values(struct hl_device *hdev)
504 struct asic_fixed_properties *prop = &hdev->asic_prop;
506 if (hdev->card_type == cpucp_card_type_pmc) {
507 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509 if (prop->fw_security_enabled)
510 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
515 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
519 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 struct asic_fixed_properties *prop = &hdev->asic_prop;
522 u32 num_sync_stream_queues = 0;
525 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
526 prop->hw_queues_props = kcalloc(prop->max_queues,
527 sizeof(struct hw_queue_properties),
530 if (!prop->hw_queues_props)
533 for (i = 0 ; i < prop->max_queues ; i++) {
534 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
535 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
536 prop->hw_queues_props[i].driver_only = 0;
537 prop->hw_queues_props[i].supports_sync_stream = 1;
538 prop->hw_queues_props[i].cb_alloc_flags =
540 num_sync_stream_queues++;
541 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
542 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
543 prop->hw_queues_props[i].driver_only = 1;
544 prop->hw_queues_props[i].supports_sync_stream = 0;
545 prop->hw_queues_props[i].cb_alloc_flags =
547 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
548 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
549 prop->hw_queues_props[i].driver_only = 0;
550 prop->hw_queues_props[i].supports_sync_stream = 0;
551 prop->hw_queues_props[i].cb_alloc_flags =
555 prop->hw_queues_props[i].collective_mode =
556 get_collective_mode(hdev, i);
559 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
560 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
561 prop->collective_first_sob = 0;
562 prop->collective_first_mon = 0;
564 /* 2 SOBs per internal queue stream are reserved for collective */
565 prop->sync_stream_first_sob =
566 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
567 * QMAN_STREAMS * HL_RSVD_SOBS;
569 /* 1 monitor per internal queue stream are reserved for collective
570 * 2 monitors per external queue stream are reserved for collective
572 prop->sync_stream_first_mon =
573 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
574 (NUMBER_OF_EXT_HW_QUEUES * 2);
576 prop->dram_base_address = DRAM_PHYS_BASE;
577 prop->dram_size = GAUDI_HBM_SIZE_32GB;
578 prop->dram_end_address = prop->dram_base_address +
580 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582 prop->sram_base_address = SRAM_BASE_ADDR;
583 prop->sram_size = SRAM_SIZE;
584 prop->sram_end_address = prop->sram_base_address +
586 prop->sram_user_base_address = prop->sram_base_address +
587 SRAM_USER_BASE_OFFSET;
589 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591 prop->mmu_pgt_size = 0x800000; /* 8MB */
593 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
594 prop->mmu_pte_size = HL_PTE_SIZE;
595 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
596 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
597 prop->dram_page_size = PAGE_SIZE_2MB;
598 prop->dram_supports_virtual_memory = false;
600 prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
601 prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
602 prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
603 prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
604 prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
605 prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
606 prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
607 prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
608 prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
609 prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
610 prop->pmmu.start_addr = VA_HOST_SPACE_START;
611 prop->pmmu.end_addr =
612 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
613 prop->pmmu.page_size = PAGE_SIZE_4KB;
614 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
615 prop->pmmu.last_mask = LAST_MASK;
616 /* TODO: will be duplicated until implementing per-MMU props */
617 prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
618 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
620 /* PMMU and HPMMU are the same except of page size */
621 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
622 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
624 /* shifts and masks are the same in PMMU and DMMU */
625 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
626 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
627 prop->dmmu.end_addr = VA_HOST_SPACE_END;
628 prop->dmmu.page_size = PAGE_SIZE_2MB;
630 prop->cfg_size = CFG_SIZE;
631 prop->max_asid = MAX_ASID;
632 prop->num_of_events = GAUDI_EVENT_SIZE;
633 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
635 set_default_power_values(hdev);
637 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
638 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
640 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
641 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
643 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
646 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
648 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
649 prop->sync_stream_first_sob +
650 (num_sync_stream_queues * HL_RSVD_SOBS);
651 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
652 prop->sync_stream_first_mon +
653 (num_sync_stream_queues * HL_RSVD_MONS);
655 prop->first_available_user_msix_interrupt = USHRT_MAX;
657 for (i = 0 ; i < HL_MAX_DCORES ; i++)
658 prop->first_available_cq[i] = USHRT_MAX;
660 prop->fw_cpu_boot_dev_sts0_valid = false;
661 prop->fw_cpu_boot_dev_sts1_valid = false;
662 prop->hard_reset_done_by_fw = false;
663 prop->gic_interrupts_enable = true;
665 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
667 prop->clk_pll_index = HL_GAUDI_MME_PLL;
668 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
670 prop->use_get_power_for_reset_history = true;
675 static int gaudi_pci_bars_map(struct hl_device *hdev)
677 static const char * const name[] = {"SRAM", "CFG", "HBM"};
678 bool is_wc[3] = {false, false, true};
681 rc = hl_pci_bars_map(hdev, name, is_wc);
685 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
686 (CFG_BASE - SPI_FLASH_BASE_ADDR);
691 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
693 struct gaudi_device *gaudi = hdev->asic_specific;
694 struct hl_inbound_pci_region pci_region;
698 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
701 if (hdev->asic_prop.iatu_done_by_fw)
704 /* Inbound Region 2 - Bar 4 - Point to HBM */
705 pci_region.mode = PCI_BAR_MATCH_MODE;
706 pci_region.bar = HBM_BAR_ID;
707 pci_region.addr = addr;
708 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
713 old_addr = gaudi->hbm_bar_cur_addr;
714 gaudi->hbm_bar_cur_addr = addr;
720 static int gaudi_init_iatu(struct hl_device *hdev)
722 struct hl_inbound_pci_region inbound_region;
723 struct hl_outbound_pci_region outbound_region;
726 if (hdev->asic_prop.iatu_done_by_fw)
729 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
730 inbound_region.mode = PCI_BAR_MATCH_MODE;
731 inbound_region.bar = SRAM_BAR_ID;
732 inbound_region.addr = SRAM_BASE_ADDR;
733 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
737 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
738 inbound_region.mode = PCI_BAR_MATCH_MODE;
739 inbound_region.bar = CFG_BAR_ID;
740 inbound_region.addr = SPI_FLASH_BASE_ADDR;
741 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
745 /* Inbound Region 2 - Bar 4 - Point to HBM */
746 inbound_region.mode = PCI_BAR_MATCH_MODE;
747 inbound_region.bar = HBM_BAR_ID;
748 inbound_region.addr = DRAM_PHYS_BASE;
749 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
753 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
755 /* Outbound Region 0 - Point to Host */
756 outbound_region.addr = HOST_PHYS_BASE;
757 outbound_region.size = HOST_PHYS_SIZE;
758 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
764 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
766 return RREG32(mmHW_STATE);
769 static int gaudi_early_init(struct hl_device *hdev)
771 struct asic_fixed_properties *prop = &hdev->asic_prop;
772 struct pci_dev *pdev = hdev->pdev;
776 rc = gaudi_set_fixed_properties(hdev);
778 dev_err(hdev->dev, "Failed setting fixed properties\n");
782 /* Check BAR sizes */
783 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
785 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
787 (unsigned long long) pci_resource_len(pdev,
791 goto free_queue_props;
794 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
796 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
798 (unsigned long long) pci_resource_len(pdev,
802 goto free_queue_props;
805 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
806 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
808 /* If FW security is enabled at this point it means no access to ELBI */
809 if (hdev->asic_prop.fw_security_enabled) {
810 hdev->asic_prop.iatu_done_by_fw = true;
813 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
814 * decision can only be taken based on PCI ID security.
816 hdev->asic_prop.gic_interrupts_enable = false;
820 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
823 goto free_queue_props;
825 /* Check whether FW is configuring iATU */
826 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
827 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
828 hdev->asic_prop.iatu_done_by_fw = true;
831 rc = hl_pci_init(hdev);
833 goto free_queue_props;
835 /* Before continuing in the initialization, we need to read the preboot
836 * version to determine whether we run with a security-enabled firmware
838 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
840 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
842 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
844 if (hdev->reset_on_preboot_fail)
845 hdev->asic_funcs->hw_fini(hdev, true, false);
849 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
851 "H/W state is dirty, must reset before initializing\n");
852 hdev->asic_funcs->hw_fini(hdev, true, false);
860 kfree(hdev->asic_prop.hw_queues_props);
864 static int gaudi_early_fini(struct hl_device *hdev)
866 kfree(hdev->asic_prop.hw_queues_props);
873 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
875 * @hdev: pointer to hl_device structure
878 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
880 struct asic_fixed_properties *prop = &hdev->asic_prop;
881 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
882 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
885 if (hdev->asic_prop.fw_security_enabled) {
886 struct gaudi_device *gaudi = hdev->asic_specific;
888 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
891 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
896 freq = pll_freq_arr[2];
898 /* Backward compatibility */
899 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
900 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
901 nr = RREG32(mmPSOC_CPU_PLL_NR);
902 nf = RREG32(mmPSOC_CPU_PLL_NF);
903 od = RREG32(mmPSOC_CPU_PLL_OD);
905 if (div_sel == DIV_SEL_REF_CLK ||
906 div_sel == DIV_SEL_DIVIDED_REF) {
907 if (div_sel == DIV_SEL_REF_CLK)
910 freq = PLL_REF_CLK / (div_fctr + 1);
911 } else if (div_sel == DIV_SEL_PLL_CLK ||
912 div_sel == DIV_SEL_DIVIDED_PLL) {
913 pll_clk = PLL_REF_CLK * (nf + 1) /
914 ((nr + 1) * (od + 1));
915 if (div_sel == DIV_SEL_PLL_CLK)
918 freq = pll_clk / (div_fctr + 1);
921 "Received invalid div select value: %d",
927 prop->psoc_timestamp_frequency = freq;
928 prop->psoc_pci_pll_nr = nr;
929 prop->psoc_pci_pll_nf = nf;
930 prop->psoc_pci_pll_od = od;
931 prop->psoc_pci_pll_div_factor = div_fctr;
936 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
937 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
939 struct asic_fixed_properties *prop = &hdev->asic_prop;
940 struct packet_lin_dma *init_tpc_mem_pkt;
941 struct hl_cs_job *job;
948 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
952 init_tpc_mem_pkt = cb->kernel_address;
953 cb_size = sizeof(*init_tpc_mem_pkt);
954 memset(init_tpc_mem_pkt, 0, cb_size);
956 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
958 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
959 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
960 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
961 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
963 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
965 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
966 dst_addr = (prop->sram_user_base_address &
967 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
968 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
969 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
971 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
973 dev_err(hdev->dev, "Failed to allocate a new job\n");
980 atomic_inc(&job->user_cb->cs_cnt);
981 job->user_cb_size = cb_size;
982 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
983 job->patched_cb = job->user_cb;
984 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
986 hl_debugfs_add_job(hdev, job);
988 rc = gaudi_send_job_on_qman0(hdev, job);
993 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
994 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1000 hl_userptr_delete_list(hdev, &job->userptr_list);
1001 hl_debugfs_remove_job(hdev, job);
1003 atomic_dec(&cb->cs_cnt);
1007 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1013 * gaudi_init_tpc_mem() - Initialize TPC memories.
1014 * @hdev: Pointer to hl_device structure.
1016 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1018 * Return: 0 for success, negative value for error.
1020 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1022 const struct firmware *fw;
1025 dma_addr_t dma_handle;
1029 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1030 if (rc == -EINTR && count-- > 0) {
1036 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1042 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1043 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1046 "Failed to allocate %zu of dma memory for TPC kernel\n",
1052 memcpy(cpu_addr, fw->data, fw_size);
1054 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1056 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1060 release_firmware(fw);
1064 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1066 struct gaudi_device *gaudi = hdev->asic_specific;
1067 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1068 struct hl_hw_queue *q;
1069 u32 i, sob_id, sob_group_id, queue_id;
1071 /* Iterate through SOB groups and assign a SOB for each slave queue */
1073 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1074 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1076 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1077 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1078 q = &hdev->kernel_queues[queue_id + (4 * i)];
1079 q->sync_stream_prop.collective_sob_id = sob_id + i;
1082 /* Both DMA5 and TPC7 use the same resources since only a single
1083 * engine need to participate in the reduction process
1085 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1086 q = &hdev->kernel_queues[queue_id];
1087 q->sync_stream_prop.collective_sob_id =
1088 sob_id + NIC_NUMBER_OF_ENGINES;
1090 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1091 q = &hdev->kernel_queues[queue_id];
1092 q->sync_stream_prop.collective_sob_id =
1093 sob_id + NIC_NUMBER_OF_ENGINES;
1096 static void gaudi_sob_group_hw_reset(struct kref *ref)
1098 struct gaudi_hw_sob_group *hw_sob_group =
1099 container_of(ref, struct gaudi_hw_sob_group, kref);
1100 struct hl_device *hdev = hw_sob_group->hdev;
1103 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1104 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1105 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1107 kref_init(&hw_sob_group->kref);
1110 static void gaudi_sob_group_reset_error(struct kref *ref)
1112 struct gaudi_hw_sob_group *hw_sob_group =
1113 container_of(ref, struct gaudi_hw_sob_group, kref);
1114 struct hl_device *hdev = hw_sob_group->hdev;
1117 "SOB release shouldn't be called here, base_sob_id: %d\n",
1118 hw_sob_group->base_sob_id);
1121 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1123 struct gaudi_collective_properties *prop;
1126 prop = &gaudi->collective_props;
1128 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1130 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1131 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1132 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1133 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1134 /* Set collective engine bit */
1135 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1136 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1139 static int gaudi_collective_init(struct hl_device *hdev)
1141 u32 i, sob_id, reserved_sobs_per_group;
1142 struct gaudi_collective_properties *prop;
1143 struct gaudi_device *gaudi;
1145 gaudi = hdev->asic_specific;
1146 prop = &gaudi->collective_props;
1147 sob_id = hdev->asic_prop.collective_first_sob;
1149 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1150 reserved_sobs_per_group =
1151 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1153 /* Init SOB groups */
1154 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1155 prop->hw_sob_group[i].hdev = hdev;
1156 prop->hw_sob_group[i].base_sob_id = sob_id;
1157 sob_id += reserved_sobs_per_group;
1158 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1161 for (i = 0 ; i < QMAN_STREAMS; i++) {
1162 prop->next_sob_group_val[i] = 1;
1163 prop->curr_sob_group_idx[i] = 0;
1164 gaudi_collective_map_sobs(hdev, i);
1167 gaudi_collective_mstr_sob_mask_set(gaudi);
1172 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1174 struct gaudi_device *gaudi = hdev->asic_specific;
1175 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1177 kref_put(&cprop->hw_sob_group[sob_group].kref,
1178 gaudi_sob_group_hw_reset);
1181 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1182 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1184 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1185 struct gaudi_collective_properties *cprop;
1186 struct hl_gen_wait_properties wait_prop;
1187 struct hl_sync_stream_properties *prop;
1188 struct gaudi_device *gaudi;
1190 gaudi = hdev->asic_specific;
1191 cprop = &gaudi->collective_props;
1192 queue_id = job->hw_queue_id;
1193 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1196 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1197 master_monitor = prop->collective_mstr_mon_id[0];
1199 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1202 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1203 master_sob_base, cprop->mstr_sob_mask[0],
1204 cprop->next_sob_group_val[stream],
1205 master_monitor, queue_id);
1207 wait_prop.data = (void *) job->patched_cb;
1208 wait_prop.sob_base = master_sob_base;
1209 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1210 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1211 wait_prop.mon_id = master_monitor;
1212 wait_prop.q_idx = queue_id;
1213 wait_prop.size = cb_size;
1214 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1216 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1217 master_monitor = prop->collective_mstr_mon_id[1];
1220 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1221 master_sob_base, cprop->mstr_sob_mask[1],
1222 cprop->next_sob_group_val[stream],
1223 master_monitor, queue_id);
1225 wait_prop.sob_base = master_sob_base;
1226 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1227 wait_prop.mon_id = master_monitor;
1228 wait_prop.size = cb_size;
1229 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1232 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1233 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1235 struct hl_gen_wait_properties wait_prop;
1236 struct hl_sync_stream_properties *prop;
1237 u32 queue_id, cb_size = 0;
1239 queue_id = job->hw_queue_id;
1240 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1242 if (job->cs->encaps_signals) {
1243 /* use the encaps signal handle store earlier in the flow
1244 * and set the SOB information from the encaps
1247 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1250 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1252 cs_cmpl->hw_sob->sob_id,
1256 /* Add to wait CBs using slave monitor */
1257 wait_prop.data = (void *) job->user_cb;
1258 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1259 wait_prop.sob_mask = 0x1;
1260 wait_prop.sob_val = cs_cmpl->sob_val;
1261 wait_prop.mon_id = prop->collective_slave_mon_id;
1262 wait_prop.q_idx = queue_id;
1263 wait_prop.size = cb_size;
1266 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1267 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1268 prop->collective_slave_mon_id, queue_id);
1270 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1273 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1274 prop->collective_sob_id, queue_id);
1276 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1277 prop->collective_sob_id, cb_size, false);
1280 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1282 struct hl_cs_compl *signal_cs_cmpl =
1283 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1284 struct hl_cs_compl *cs_cmpl =
1285 container_of(cs->fence, struct hl_cs_compl, base_fence);
1286 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1287 struct gaudi_collective_properties *cprop;
1288 u32 stream, queue_id, sob_group_offset;
1289 struct gaudi_device *gaudi;
1290 struct hl_device *hdev;
1291 struct hl_cs_job *job;
1296 gaudi = hdev->asic_specific;
1297 cprop = &gaudi->collective_props;
1299 if (cs->encaps_signals) {
1300 cs_cmpl->hw_sob = handle->hw_sob;
1301 /* at this checkpoint we only need the hw_sob pointer
1302 * for the completion check before start going over the jobs
1303 * of the master/slaves, the sob_value will be taken later on
1304 * in gaudi_collective_slave_init_job depends on each
1305 * job wait offset value.
1307 cs_cmpl->sob_val = 0;
1309 /* copy the SOB id and value of the signal CS */
1310 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1311 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1314 /* check again if the signal cs already completed.
1315 * if yes then don't send any wait cs since the hw_sob
1316 * could be in reset already. if signal is not completed
1317 * then get refcount to hw_sob to prevent resetting the sob
1318 * while wait cs is not submitted.
1319 * note that this check is protected by two locks,
1320 * hw queue lock and completion object lock,
1321 * and the same completion object lock also protects
1322 * the hw_sob reset handler function.
1323 * The hw_queue lock prevent out of sync of hw_sob
1324 * refcount value, changed by signal/wait flows.
1326 spin_lock(&signal_cs_cmpl->lock);
1328 if (completion_done(&cs->signal_fence->completion)) {
1329 spin_unlock(&signal_cs_cmpl->lock);
1332 /* Increment kref since all slave queues are now waiting on it */
1333 kref_get(&cs_cmpl->hw_sob->kref);
1335 spin_unlock(&signal_cs_cmpl->lock);
1337 /* Calculate the stream from collective master queue (1st job) */
1338 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1339 stream = job->hw_queue_id % 4;
1341 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1343 list_for_each_entry(job, &cs->job_list, cs_node) {
1344 queue_id = job->hw_queue_id;
1346 if (hdev->kernel_queues[queue_id].collective_mode ==
1347 HL_COLLECTIVE_MASTER)
1348 gaudi_collective_master_init_job(hdev, job, stream,
1351 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1354 cs_cmpl->sob_group = sob_group_offset;
1356 /* Handle sob group kref and wraparound */
1357 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1358 cprop->next_sob_group_val[stream]++;
1360 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1362 * Decrement as we reached the max value.
1363 * The release function won't be called here as we've
1364 * just incremented the refcount.
1366 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1367 gaudi_sob_group_reset_error);
1368 cprop->next_sob_group_val[stream] = 1;
1369 /* only two SOBs are currently in use */
1370 cprop->curr_sob_group_idx[stream] =
1371 (cprop->curr_sob_group_idx[stream] + 1) &
1374 gaudi_collective_map_sobs(hdev, stream);
1376 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1377 cprop->curr_sob_group_idx[stream], stream);
1381 hl_fence_put(cs->signal_fence);
1382 cs->signal_fence = NULL;
1387 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1388 struct hl_ctx *ctx, struct hl_cs *cs,
1389 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1390 u32 encaps_signal_offset)
1392 struct hw_queue_properties *hw_queue_prop;
1393 struct hl_cs_counters_atomic *cntr;
1394 struct hl_cs_job *job;
1399 cntr = &hdev->aggregated_cs_counters;
1401 if (mode == HL_COLLECTIVE_MASTER) {
1402 /* CB size of collective master queue contains
1403 * 4 msg short packets for monitor 1 configuration
1405 * 4 msg short packets for monitor 2 configuration
1407 * 2 msg prot packets for completion and MSI-X
1409 cb_size = sizeof(struct packet_msg_short) * 8 +
1410 sizeof(struct packet_fence) * 2 +
1411 sizeof(struct packet_msg_prot) * 2;
1414 /* CB size of collective slave queues contains
1415 * 4 msg short packets for monitor configuration
1417 * 1 additional msg short packet for sob signal
1419 cb_size = sizeof(struct packet_msg_short) * 5 +
1420 sizeof(struct packet_fence);
1424 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1425 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1427 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1428 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1429 dev_err(hdev->dev, "Failed to allocate a new job\n");
1433 /* Allocate internal mapped CB for non patched CBs */
1434 cb = hl_cb_kernel_create(hdev, cb_size,
1435 hdev->mmu_enable && !patched_cb);
1437 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1438 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1446 atomic_inc(&job->user_cb->cs_cnt);
1447 job->user_cb_size = cb_size;
1448 job->hw_queue_id = queue_id;
1450 /* since its guaranteed to have only one chunk in the collective wait
1451 * cs, we can use this chunk to set the encapsulated signal offset
1454 if (cs->encaps_signals)
1455 job->encaps_sig_wait_offset = encaps_signal_offset;
1458 * No need in parsing, user CB is the patched CB.
1459 * We call hl_cb_destroy() out of two reasons - we don't need
1460 * the CB in the CB idr anymore and to decrement its refcount as
1461 * it was incremented inside hl_cb_kernel_create().
1464 job->patched_cb = job->user_cb;
1466 job->patched_cb = NULL;
1468 job->job_cb_size = job->user_cb_size;
1469 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1471 /* increment refcount as for external queues we get completion */
1472 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1475 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1477 list_add_tail(&job->cs_node, &cs->job_list);
1479 hl_debugfs_add_job(hdev, job);
1484 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1485 struct hl_ctx *ctx, struct hl_cs *cs,
1486 u32 wait_queue_id, u32 collective_engine_id,
1487 u32 encaps_signal_offset)
1489 struct gaudi_device *gaudi = hdev->asic_specific;
1490 struct hw_queue_properties *hw_queue_prop;
1491 u32 queue_id, collective_queue, num_jobs;
1492 u32 stream, nic_queue, nic_idx = 0;
1496 /* Verify wait queue id is configured as master */
1497 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1498 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1500 "Queue %d is not configured as collective master\n",
1505 /* Verify engine id is supported */
1506 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1507 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1509 "Collective wait does not support engine %u\n",
1510 collective_engine_id);
1514 stream = wait_queue_id % 4;
1516 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1517 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1519 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1521 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1522 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1524 /* First job goes to the collective master queue, it will wait for
1525 * the collective slave queues to finish execution.
1526 * The synchronization is done using two monitors:
1527 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1528 * reduction engine (DMA5/TPC7).
1530 * Rest of the jobs goes to the collective slave queues which will
1531 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1533 for (i = 0 ; i < num_jobs ; i++) {
1535 queue_id = wait_queue_id;
1536 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537 HL_COLLECTIVE_MASTER, queue_id,
1538 wait_queue_id, encaps_signal_offset);
1540 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1541 if (gaudi->hw_cap_initialized &
1542 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1547 queue_id = nic_queue;
1554 queue_id = collective_queue;
1557 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1558 HL_COLLECTIVE_SLAVE, queue_id,
1559 wait_queue_id, encaps_signal_offset);
1569 static int gaudi_late_init(struct hl_device *hdev)
1571 struct gaudi_device *gaudi = hdev->asic_specific;
1574 rc = gaudi->cpucp_info_get(hdev);
1576 dev_err(hdev->dev, "Failed to get cpucp info\n");
1580 if ((hdev->card_type == cpucp_card_type_pci) &&
1581 (hdev->nic_ports_mask & 0x3)) {
1583 "PCI card detected, only 8 ports are enabled\n");
1584 hdev->nic_ports_mask &= ~0x3;
1586 /* Stop and disable unused NIC QMANs */
1587 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1588 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1589 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1591 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1592 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1593 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1595 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1596 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1598 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1601 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1603 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1607 /* Scrub both SRAM and DRAM */
1608 rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1610 goto disable_pci_access;
1612 rc = gaudi_fetch_psoc_frequency(hdev);
1614 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1615 goto disable_pci_access;
1618 rc = gaudi_mmu_clear_pgt_range(hdev);
1620 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1621 goto disable_pci_access;
1624 rc = gaudi_init_tpc_mem(hdev);
1626 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1627 goto disable_pci_access;
1630 rc = gaudi_collective_init(hdev);
1632 dev_err(hdev->dev, "Failed to init collective\n");
1633 goto disable_pci_access;
1636 /* We only support a single ASID for the user, so for the sake of optimization, just
1637 * initialize the ASID one time during device initialization with the fixed value of 1
1639 gaudi_mmu_prepare(hdev, 1);
1641 hl_fw_set_pll_profile(hdev);
1646 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1651 static void gaudi_late_fini(struct hl_device *hdev)
1653 const struct hwmon_channel_info **channel_info_arr;
1656 if (!hdev->hl_chip_info->info)
1659 channel_info_arr = hdev->hl_chip_info->info;
1661 while (channel_info_arr[i]) {
1662 kfree(channel_info_arr[i]->config);
1663 kfree(channel_info_arr[i]);
1667 kfree(channel_info_arr);
1669 hdev->hl_chip_info->info = NULL;
1672 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1674 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1675 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1679 * The device CPU works with 40-bits addresses, while bit 39 must be set
1680 * to '1' when accessing the host.
1681 * Bits 49:39 of the full host address are saved for a later
1682 * configuration of the HW to perform extension to 50 bits.
1683 * Because there is a single HW register that holds the extension bits,
1684 * these bits must be identical in all allocated range.
1687 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1689 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1690 HL_CPU_ACCESSIBLE_MEM_SIZE,
1692 GFP_KERNEL | __GFP_ZERO);
1693 if (!virt_addr_arr[i]) {
1695 goto free_dma_mem_arr;
1698 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1699 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1700 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1704 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1706 "MSB of CPU accessible DMA memory are not identical in all range\n");
1708 goto free_dma_mem_arr;
1711 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1712 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1713 hdev->cpu_pci_msb_addr =
1714 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1716 if (!hdev->asic_prop.fw_security_enabled)
1717 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1720 for (j = 0 ; j < i ; j++)
1721 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1722 HL_CPU_ACCESSIBLE_MEM_SIZE,
1729 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1731 struct gaudi_device *gaudi = hdev->asic_specific;
1732 struct gaudi_internal_qman_info *q;
1735 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1736 q = &gaudi->internal_qmans[i];
1737 if (!q->pq_kernel_addr)
1739 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1745 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1747 struct gaudi_device *gaudi = hdev->asic_specific;
1748 struct gaudi_internal_qman_info *q;
1751 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1752 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1755 q = &gaudi->internal_qmans[i];
1758 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1759 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1761 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1762 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1764 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1765 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1767 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1768 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1771 dev_err(hdev->dev, "Bad internal queue index %d", i);
1773 goto free_internal_qmans_pq_mem;
1776 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1779 GFP_KERNEL | __GFP_ZERO);
1780 if (!q->pq_kernel_addr) {
1782 goto free_internal_qmans_pq_mem;
1788 free_internal_qmans_pq_mem:
1789 gaudi_free_internal_qmans_pq_mem(hdev);
1793 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1795 struct asic_fixed_properties *prop = &hdev->asic_prop;
1796 struct pci_mem_region *region;
1799 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1800 region->region_base = CFG_BASE;
1801 region->region_size = CFG_SIZE;
1802 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1803 region->bar_size = CFG_BAR_SIZE;
1804 region->bar_id = CFG_BAR_ID;
1808 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1809 region->region_base = SRAM_BASE_ADDR;
1810 region->region_size = SRAM_SIZE;
1811 region->offset_in_bar = 0;
1812 region->bar_size = SRAM_BAR_SIZE;
1813 region->bar_id = SRAM_BAR_ID;
1817 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1818 region->region_base = DRAM_PHYS_BASE;
1819 region->region_size = hdev->asic_prop.dram_size;
1820 region->offset_in_bar = 0;
1821 region->bar_size = prop->dram_pci_bar_size;
1822 region->bar_id = HBM_BAR_ID;
1826 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1827 region->region_base = PSOC_SCRATCHPAD_ADDR;
1828 region->region_size = PSOC_SCRATCHPAD_SIZE;
1829 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1830 region->bar_size = CFG_BAR_SIZE;
1831 region->bar_id = CFG_BAR_ID;
1835 static int gaudi_sw_init(struct hl_device *hdev)
1837 struct gaudi_device *gaudi;
1838 u32 i, event_id = 0;
1841 /* Allocate device structure */
1842 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1846 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1847 if (gaudi_irq_map_table[i].valid) {
1848 if (event_id == GAUDI_EVENT_SIZE) {
1850 "Event array exceeds the limit of %u events\n",
1853 goto free_gaudi_device;
1856 gaudi->events[event_id++] =
1857 gaudi_irq_map_table[i].fc_id;
1861 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1863 hdev->asic_specific = gaudi;
1865 /* Create DMA pool for small allocations */
1866 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1867 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1868 if (!hdev->dma_pool) {
1869 dev_err(hdev->dev, "failed to create DMA pool\n");
1871 goto free_gaudi_device;
1874 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1878 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1879 if (!hdev->cpu_accessible_dma_pool) {
1881 "Failed to create CPU accessible DMA pool\n");
1883 goto free_cpu_dma_mem;
1886 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1887 (uintptr_t) hdev->cpu_accessible_dma_mem,
1888 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1891 "Failed to add memory to CPU accessible DMA pool\n");
1893 goto free_cpu_accessible_dma_pool;
1896 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1898 goto free_cpu_accessible_dma_pool;
1900 spin_lock_init(&gaudi->hw_queues_lock);
1902 hdev->supports_sync_stream = true;
1903 hdev->supports_coresight = true;
1904 hdev->supports_staged_submission = true;
1905 hdev->supports_wait_for_multi_cs = true;
1907 hdev->asic_funcs->set_pci_memory_regions(hdev);
1908 hdev->stream_master_qid_arr =
1909 hdev->asic_funcs->get_stream_master_qid_arr();
1910 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1914 free_cpu_accessible_dma_pool:
1915 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1917 if (!hdev->asic_prop.fw_security_enabled)
1918 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1919 hdev->cpu_pci_msb_addr);
1920 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1921 HL_CPU_ACCESSIBLE_MEM_SIZE,
1922 hdev->cpu_accessible_dma_mem,
1923 hdev->cpu_accessible_dma_address);
1925 dma_pool_destroy(hdev->dma_pool);
1931 static int gaudi_sw_fini(struct hl_device *hdev)
1933 struct gaudi_device *gaudi = hdev->asic_specific;
1935 gaudi_free_internal_qmans_pq_mem(hdev);
1937 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1939 if (!hdev->asic_prop.fw_security_enabled)
1940 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1941 hdev->cpu_pci_msb_addr);
1943 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1944 HL_CPU_ACCESSIBLE_MEM_SIZE,
1945 hdev->cpu_accessible_dma_mem,
1946 hdev->cpu_accessible_dma_address);
1948 dma_pool_destroy(hdev->dma_pool);
1955 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1957 struct hl_device *hdev = arg;
1963 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1964 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1966 hl_irq_handler_eq(irq, &hdev->event_queue);
1972 * For backward compatibility, new MSI interrupts should be set after the
1973 * existing CPU and NIC interrupts.
1975 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1980 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1981 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1982 GAUDI_EVENT_QUEUE_MSI_IDX);
1984 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1985 (nr + NIC_NUMBER_OF_ENGINES + 1);
1987 return pci_irq_vector(hdev->pdev, msi_vec);
1990 static int gaudi_enable_msi_single(struct hl_device *hdev)
1994 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1996 irq = gaudi_pci_irq_vector(hdev, 0, false);
1997 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1998 "gaudi single msi", hdev);
2001 "Failed to request single MSI IRQ\n");
2006 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2008 int cq_cnt = hdev->asic_prop.completion_queues_count;
2009 int rc, i, irq_cnt_init, irq;
2011 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2012 irq = gaudi_pci_irq_vector(hdev, i, false);
2013 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2014 &hdev->completion_queue[i]);
2016 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2021 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2022 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2023 &hdev->event_queue);
2025 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2032 for (i = 0 ; i < irq_cnt_init ; i++)
2033 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2034 &hdev->completion_queue[i]);
2038 static int gaudi_enable_msi(struct hl_device *hdev)
2040 struct gaudi_device *gaudi = hdev->asic_specific;
2043 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2046 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2048 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2052 if (rc < NUMBER_OF_INTERRUPTS) {
2053 gaudi->multi_msi_mode = false;
2054 rc = gaudi_enable_msi_single(hdev);
2056 gaudi->multi_msi_mode = true;
2057 rc = gaudi_enable_msi_multi(hdev);
2061 goto free_pci_irq_vectors;
2063 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2067 free_pci_irq_vectors:
2068 pci_free_irq_vectors(hdev->pdev);
2072 static void gaudi_sync_irqs(struct hl_device *hdev)
2074 struct gaudi_device *gaudi = hdev->asic_specific;
2075 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2077 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2080 /* Wait for all pending IRQs to be finished */
2081 if (gaudi->multi_msi_mode) {
2082 for (i = 0 ; i < cq_cnt ; i++)
2083 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2085 synchronize_irq(gaudi_pci_irq_vector(hdev,
2086 GAUDI_EVENT_QUEUE_MSI_IDX,
2089 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2093 static void gaudi_disable_msi(struct hl_device *hdev)
2095 struct gaudi_device *gaudi = hdev->asic_specific;
2096 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2098 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2101 gaudi_sync_irqs(hdev);
2103 if (gaudi->multi_msi_mode) {
2104 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2106 free_irq(irq, &hdev->event_queue);
2108 for (i = 0 ; i < cq_cnt ; i++) {
2109 irq = gaudi_pci_irq_vector(hdev, i, false);
2110 free_irq(irq, &hdev->completion_queue[i]);
2113 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2116 pci_free_irq_vectors(hdev->pdev);
2118 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2121 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2123 struct gaudi_device *gaudi = hdev->asic_specific;
2125 if (hdev->asic_prop.fw_security_enabled)
2128 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2129 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2132 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2135 if (!hdev->sram_scrambler_enable)
2138 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2139 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2140 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2141 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2142 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2143 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2145 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2147 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2149 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2151 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2153 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2155 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2156 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2157 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2159 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2170 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2172 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2173 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2174 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2175 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2176 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2177 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2179 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2181 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2183 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2185 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2187 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2189 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2192 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2194 struct gaudi_device *gaudi = hdev->asic_specific;
2196 if (hdev->asic_prop.fw_security_enabled)
2199 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2200 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2203 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2206 if (!hdev->dram_scrambler_enable)
2209 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2210 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2211 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2212 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2213 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2214 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2215 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2216 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2217 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2218 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2219 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2220 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2221 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2222 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2223 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2224 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2227 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2228 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2229 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2230 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2231 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2232 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2233 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2234 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2235 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2236 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2237 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2238 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2239 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2240 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2241 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2243 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2244 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2245 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2246 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2247 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2248 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2249 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2250 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2251 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2252 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2253 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2254 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2255 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2256 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2257 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2258 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2260 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2263 static void gaudi_init_e2e(struct hl_device *hdev)
2265 if (hdev->asic_prop.fw_security_enabled)
2268 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2269 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2272 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2273 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2274 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2275 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2277 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2278 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2279 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2280 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2282 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2283 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2284 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2285 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2287 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2288 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2289 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2290 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2292 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2293 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2294 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2295 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2297 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2298 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2299 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2300 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2302 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2303 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2304 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2305 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2307 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2308 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2309 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2310 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2312 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2313 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2314 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2315 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2317 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2318 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2319 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2320 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2322 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2323 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2324 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2325 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2327 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2328 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2329 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2330 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2332 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2333 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2334 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2335 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2337 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2338 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2339 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2340 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2342 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2343 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2344 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2345 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2347 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2348 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2349 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2350 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2352 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2353 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2354 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2355 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2357 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2358 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2359 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2360 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2362 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2363 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2364 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2365 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2367 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2368 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2369 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2370 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2372 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2373 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2374 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2375 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2377 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2378 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2379 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2380 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2382 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2383 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2384 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2385 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2387 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2388 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2389 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2390 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2392 if (!hdev->dram_scrambler_enable) {
2393 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2394 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2395 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2396 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2398 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2399 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2400 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2401 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2403 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2404 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2405 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2406 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2408 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2409 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2410 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2411 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2413 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2414 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2415 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2416 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2418 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2419 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2420 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2421 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2423 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2424 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2425 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2426 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2428 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2429 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2430 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2431 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2433 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2434 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2435 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2436 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2438 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2439 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2440 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2441 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2443 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2444 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2445 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2446 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2448 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2449 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2450 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2451 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2453 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2454 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2455 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2456 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2458 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2459 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2460 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2461 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2463 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2464 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2465 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2466 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2468 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2469 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2470 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2471 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2473 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2474 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2475 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2476 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2478 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2479 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2480 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2481 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2483 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2484 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2485 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2486 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2488 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2489 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2490 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2491 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2493 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2494 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2495 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2496 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2498 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2499 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2500 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2501 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2503 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2504 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2505 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2506 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2508 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2509 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2510 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2511 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2514 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2515 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2516 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2517 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2519 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2520 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2521 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2522 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2524 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2525 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2526 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2527 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2529 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2530 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2531 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2532 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2534 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2535 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2536 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2537 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2539 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2540 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2541 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2542 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2544 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2545 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2546 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2547 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2549 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2550 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2551 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2552 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2554 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2555 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2556 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2557 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2559 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2560 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2561 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2562 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2564 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2565 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2566 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2567 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2569 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2570 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2571 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2572 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2574 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2575 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2576 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2577 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2579 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2580 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2581 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2582 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2584 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2585 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2586 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2587 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2589 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2590 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2591 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2592 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2594 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2595 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2596 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2597 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2599 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2600 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2601 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2602 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2604 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2605 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2606 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2607 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2609 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2610 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2611 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2612 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2614 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2615 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2616 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2617 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2619 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2620 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2621 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2622 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2624 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2625 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2626 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2627 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2629 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2630 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2631 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2632 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2635 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2637 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2639 if (hdev->asic_prop.fw_security_enabled)
2642 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2643 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2646 hbm0_wr = 0x33333333;
2647 hbm0_rd = 0x77777777;
2648 hbm1_wr = 0x55555555;
2649 hbm1_rd = 0xDDDDDDDD;
2651 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2652 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2653 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2654 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2656 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2657 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2658 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2659 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2661 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2662 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2663 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2664 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2666 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2667 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2668 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2669 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2671 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2672 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2673 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2674 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2675 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2676 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2677 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2678 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2679 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2680 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2681 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2682 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2684 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2685 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2686 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2687 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2688 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2689 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2690 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2691 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2692 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2693 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2694 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2695 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2698 static void gaudi_init_golden_registers(struct hl_device *hdev)
2703 gaudi_init_e2e(hdev);
2704 gaudi_init_hbm_cred(hdev);
2706 for (tpc_id = 0, tpc_offset = 0;
2707 tpc_id < TPC_NUMBER_OF_ENGINES;
2708 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2709 /* Mask all arithmetic interrupts from TPC */
2710 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2711 /* Set 16 cache lines */
2712 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2713 ICACHE_FETCH_LINE_NUM, 2);
2716 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2717 for (i = 0 ; i < 128 ; i += 8)
2718 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2720 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2721 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2722 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2723 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2726 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2727 int qman_id, dma_addr_t qman_pq_addr)
2729 struct cpu_dyn_regs *dyn_regs =
2730 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2731 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2732 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2733 u32 q_off, dma_qm_offset;
2734 u32 dma_qm_err_cfg, irq_handler_offset;
2736 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2739 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2741 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2742 so_base_en_lo = lower_32_bits(CFG_BASE +
2743 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744 so_base_en_hi = upper_32_bits(CFG_BASE +
2745 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2746 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2747 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2749 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2750 so_base_ws_lo = lower_32_bits(CFG_BASE +
2751 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752 so_base_ws_hi = upper_32_bits(CFG_BASE +
2753 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2755 q_off = dma_qm_offset + qman_id * 4;
2757 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2758 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2760 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2761 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2762 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2765 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2766 QMAN_LDMA_SRC_OFFSET);
2767 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2768 QMAN_LDMA_DST_OFFSET);
2770 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2771 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2772 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2773 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2774 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2775 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2776 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2777 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2779 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2781 /* The following configuration is needed only once per QMAN */
2783 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2784 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2785 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2787 /* Configure RAZWI IRQ */
2788 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2789 if (hdev->stop_on_err)
2791 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2793 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2795 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2796 lower_32_bits(CFG_BASE + irq_handler_offset));
2797 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2798 upper_32_bits(CFG_BASE + irq_handler_offset));
2800 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2801 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2804 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2805 QM_ARB_ERR_MSG_EN_MASK);
2807 /* Increase ARB WDT to support streams architecture */
2808 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2809 GAUDI_ARB_WDT_TIMEOUT);
2811 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2812 QMAN_EXTERNAL_MAKE_TRUSTED);
2814 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2818 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2820 struct cpu_dyn_regs *dyn_regs =
2821 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2822 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2823 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2824 u32 irq_handler_offset;
2826 /* Set to maximum possible according to physical size */
2827 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2828 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2830 /* WA for H/W bug H3-2116 */
2831 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2833 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2834 if (hdev->stop_on_err)
2835 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2837 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2839 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2840 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2841 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2843 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2844 lower_32_bits(CFG_BASE + irq_handler_offset));
2845 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2846 upper_32_bits(CFG_BASE + irq_handler_offset));
2848 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2849 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2850 WREG32(mmDMA0_CORE_PROT + dma_offset,
2851 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2852 /* If the channel is secured, it should be in MMU bypass mode */
2853 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2854 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2855 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2858 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2861 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2863 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2866 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2868 struct gaudi_device *gaudi = hdev->asic_specific;
2869 struct hl_hw_queue *q;
2870 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2872 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2875 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2876 dma_id = gaudi_dma_assignment[i];
2878 * For queues after the CPU Q need to add 1 to get the correct
2879 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2880 * order to get the correct MSI register.
2884 nic_skip = NIC_NUMBER_OF_ENGINES;
2890 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2891 q_idx = 4 * dma_id + j + cpu_skip;
2892 q = &hdev->kernel_queues[q_idx];
2894 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2895 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2899 gaudi_init_dma_core(hdev, dma_id);
2901 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2904 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2907 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2908 int qman_id, u64 qman_base_addr)
2910 struct cpu_dyn_regs *dyn_regs =
2911 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2912 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2913 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2914 u32 dma_qm_err_cfg, irq_handler_offset;
2915 u32 q_off, dma_qm_offset;
2917 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2919 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2920 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2921 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2922 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2923 so_base_en_lo = lower_32_bits(CFG_BASE +
2924 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2925 so_base_en_hi = upper_32_bits(CFG_BASE +
2926 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2927 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2928 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2929 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2930 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2931 so_base_ws_lo = lower_32_bits(CFG_BASE +
2932 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2933 so_base_ws_hi = upper_32_bits(CFG_BASE +
2934 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2936 q_off = dma_qm_offset + qman_id * 4;
2939 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2940 lower_32_bits(qman_base_addr));
2941 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2942 upper_32_bits(qman_base_addr));
2944 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2945 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2946 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2948 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2949 QMAN_CPDMA_SIZE_OFFSET);
2950 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2951 QMAN_CPDMA_SRC_OFFSET);
2952 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2953 QMAN_CPDMA_DST_OFFSET);
2955 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2956 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2957 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2959 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2960 QMAN_LDMA_SIZE_OFFSET);
2961 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2962 QMAN_LDMA_SRC_OFFSET);
2963 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2964 QMAN_LDMA_DST_OFFSET);
2966 /* Configure RAZWI IRQ */
2967 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2968 if (hdev->stop_on_err)
2970 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2972 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2974 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2975 lower_32_bits(CFG_BASE + irq_handler_offset));
2976 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2977 upper_32_bits(CFG_BASE + irq_handler_offset));
2979 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2980 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2983 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2984 QM_ARB_ERR_MSG_EN_MASK);
2986 /* Increase ARB WDT to support streams architecture */
2987 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2988 GAUDI_ARB_WDT_TIMEOUT);
2990 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2991 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2992 QMAN_INTERNAL_MAKE_TRUSTED);
2995 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2996 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2997 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2998 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3000 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
3001 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
3002 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3004 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3006 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3008 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3013 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
3015 struct gaudi_device *gaudi = hdev->asic_specific;
3016 struct gaudi_internal_qman_info *q;
3018 int i, j, dma_id, internal_q_index;
3020 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3023 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3024 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3026 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3028 * Add the CPU queue in order to get the correct queue
3029 * number as all internal queue are placed after it
3031 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3033 q = &gaudi->internal_qmans[internal_q_index];
3034 qman_base_addr = (u64) q->pq_dma_addr;
3035 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3039 /* Initializing lower CP for HBM DMA QMAN */
3040 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3042 gaudi_init_dma_core(hdev, dma_id);
3044 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3047 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3050 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3051 int qman_id, u64 qman_base_addr)
3053 struct cpu_dyn_regs *dyn_regs =
3054 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3055 u32 mtr_base_lo, mtr_base_hi;
3056 u32 so_base_lo, so_base_hi;
3057 u32 irq_handler_offset;
3061 mtr_base_lo = lower_32_bits(CFG_BASE +
3062 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3063 mtr_base_hi = upper_32_bits(CFG_BASE +
3064 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3065 so_base_lo = lower_32_bits(CFG_BASE +
3066 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3067 so_base_hi = upper_32_bits(CFG_BASE +
3068 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3070 q_off = mme_offset + qman_id * 4;
3073 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3074 lower_32_bits(qman_base_addr));
3075 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3076 upper_32_bits(qman_base_addr));
3078 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3079 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3080 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3082 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3083 QMAN_CPDMA_SIZE_OFFSET);
3084 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3085 QMAN_CPDMA_SRC_OFFSET);
3086 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3087 QMAN_CPDMA_DST_OFFSET);
3089 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3090 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3091 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3093 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3094 QMAN_LDMA_SIZE_OFFSET);
3095 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3096 QMAN_LDMA_SRC_OFFSET);
3097 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3098 QMAN_LDMA_DST_OFFSET);
3100 /* Configure RAZWI IRQ */
3101 mme_id = mme_offset /
3102 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3104 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3105 if (hdev->stop_on_err)
3107 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3109 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3111 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3112 lower_32_bits(CFG_BASE + irq_handler_offset));
3113 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3114 upper_32_bits(CFG_BASE + irq_handler_offset));
3116 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3117 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3120 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3121 QM_ARB_ERR_MSG_EN_MASK);
3123 /* Increase ARB WDT to support streams architecture */
3124 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3125 GAUDI_ARB_WDT_TIMEOUT);
3127 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3128 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3129 QMAN_INTERNAL_MAKE_TRUSTED);
3132 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3133 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3134 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3135 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3138 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3140 struct gaudi_device *gaudi = hdev->asic_specific;
3141 struct gaudi_internal_qman_info *q;
3144 int i, internal_q_index;
3146 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3150 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3151 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3154 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3156 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3157 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3158 q = &gaudi->internal_qmans[internal_q_index];
3159 qman_base_addr = (u64) q->pq_dma_addr;
3160 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3166 /* Initializing lower CP for MME QMANs */
3167 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3168 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3169 gaudi_init_mme_qman(hdev, 0, 4, 0);
3171 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3172 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3174 gaudi->hw_cap_initialized |= HW_CAP_MME;
3177 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3178 int qman_id, u64 qman_base_addr)
3180 struct cpu_dyn_regs *dyn_regs =
3181 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3182 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3183 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3184 u32 tpc_qm_err_cfg, irq_handler_offset;
3187 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3188 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3189 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3190 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3191 so_base_en_lo = lower_32_bits(CFG_BASE +
3192 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3193 so_base_en_hi = upper_32_bits(CFG_BASE +
3194 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3195 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3196 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3197 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3198 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3199 so_base_ws_lo = lower_32_bits(CFG_BASE +
3200 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3201 so_base_ws_hi = upper_32_bits(CFG_BASE +
3202 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3204 q_off = tpc_offset + qman_id * 4;
3206 tpc_id = tpc_offset /
3207 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3210 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3211 lower_32_bits(qman_base_addr));
3212 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3213 upper_32_bits(qman_base_addr));
3215 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3216 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3217 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3219 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3220 QMAN_CPDMA_SIZE_OFFSET);
3221 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3222 QMAN_CPDMA_SRC_OFFSET);
3223 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3224 QMAN_CPDMA_DST_OFFSET);
3226 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3227 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3228 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3230 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3231 QMAN_LDMA_SIZE_OFFSET);
3232 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3233 QMAN_LDMA_SRC_OFFSET);
3234 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3235 QMAN_LDMA_DST_OFFSET);
3237 /* Configure RAZWI IRQ */
3238 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3239 if (hdev->stop_on_err)
3241 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3243 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3245 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3246 lower_32_bits(CFG_BASE + irq_handler_offset));
3247 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3248 upper_32_bits(CFG_BASE + irq_handler_offset));
3250 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3251 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3254 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3255 QM_ARB_ERR_MSG_EN_MASK);
3257 /* Increase ARB WDT to support streams architecture */
3258 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3259 GAUDI_ARB_WDT_TIMEOUT);
3261 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3262 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3263 QMAN_INTERNAL_MAKE_TRUSTED);
3266 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3267 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3268 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3269 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3271 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3273 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3275 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3277 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3279 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3284 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3286 struct gaudi_device *gaudi = hdev->asic_specific;
3287 struct gaudi_internal_qman_info *q;
3289 u32 so_base_hi, tpc_offset = 0;
3290 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3291 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3292 int i, tpc_id, internal_q_index;
3294 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3297 so_base_hi = upper_32_bits(CFG_BASE +
3298 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3300 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3301 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3302 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3303 tpc_id * QMAN_STREAMS + i;
3304 q = &gaudi->internal_qmans[internal_q_index];
3305 qman_base_addr = (u64) q->pq_dma_addr;
3306 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3310 /* Initializing lower CP for TPC QMAN */
3311 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3313 /* Enable the QMAN and TPC channel */
3314 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3319 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3322 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3324 gaudi->hw_cap_initialized |=
3325 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3329 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3330 int qman_id, u64 qman_base_addr, int nic_id)
3332 struct cpu_dyn_regs *dyn_regs =
3333 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3334 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3335 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3336 u32 nic_qm_err_cfg, irq_handler_offset;
3339 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3340 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3341 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3342 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3343 so_base_en_lo = lower_32_bits(CFG_BASE +
3344 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3345 so_base_en_hi = upper_32_bits(CFG_BASE +
3346 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3347 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3348 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3349 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3350 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3351 so_base_ws_lo = lower_32_bits(CFG_BASE +
3352 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3353 so_base_ws_hi = upper_32_bits(CFG_BASE +
3354 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3356 q_off = nic_offset + qman_id * 4;
3358 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3359 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3361 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3362 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3363 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3365 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3366 QMAN_LDMA_SIZE_OFFSET);
3367 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3368 QMAN_LDMA_SRC_OFFSET);
3369 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3370 QMAN_LDMA_DST_OFFSET);
3372 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3373 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3374 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3375 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3377 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3378 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3379 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3380 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3381 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3384 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3385 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3386 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3388 /* Configure RAZWI IRQ */
3389 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3390 if (hdev->stop_on_err)
3392 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3394 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3396 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3397 lower_32_bits(CFG_BASE + irq_handler_offset));
3398 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3399 upper_32_bits(CFG_BASE + irq_handler_offset));
3401 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3402 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3405 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3406 QM_ARB_ERR_MSG_EN_MASK);
3408 /* Increase ARB WDT to support streams architecture */
3409 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3410 GAUDI_ARB_WDT_TIMEOUT);
3412 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3413 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3414 QMAN_INTERNAL_MAKE_TRUSTED);
3418 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3420 struct gaudi_device *gaudi = hdev->asic_specific;
3421 struct gaudi_internal_qman_info *q;
3424 u32 nic_delta_between_qmans =
3425 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3426 u32 nic_delta_between_nics =
3427 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3428 int i, nic_id, internal_q_index;
3430 if (!hdev->nic_ports_mask)
3433 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3436 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3438 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3439 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3440 nic_offset += nic_delta_between_qmans;
3442 nic_offset -= (nic_delta_between_qmans * 2);
3443 nic_offset += nic_delta_between_nics;
3448 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3449 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3450 nic_id * QMAN_STREAMS + i;
3451 q = &gaudi->internal_qmans[internal_q_index];
3452 qman_base_addr = (u64) q->pq_dma_addr;
3453 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3454 qman_base_addr, nic_id);
3457 /* Enable the QMAN */
3458 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3460 nic_offset += nic_delta_between_qmans;
3462 nic_offset -= (nic_delta_between_qmans * 2);
3463 nic_offset += nic_delta_between_nics;
3466 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3470 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3472 struct gaudi_device *gaudi = hdev->asic_specific;
3474 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3477 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3478 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3479 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3482 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3484 struct gaudi_device *gaudi = hdev->asic_specific;
3486 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3489 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3490 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3491 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3492 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3493 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3496 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3498 struct gaudi_device *gaudi = hdev->asic_specific;
3500 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3503 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3504 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3507 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3509 struct gaudi_device *gaudi = hdev->asic_specific;
3513 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3516 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3517 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3518 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3522 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3524 struct gaudi_device *gaudi = hdev->asic_specific;
3525 u32 nic_mask, nic_offset = 0;
3526 u32 nic_delta_between_qmans =
3527 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3528 u32 nic_delta_between_nics =
3529 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3532 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3533 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3535 if (gaudi->hw_cap_initialized & nic_mask)
3536 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3538 nic_offset += nic_delta_between_qmans;
3540 nic_offset -= (nic_delta_between_qmans * 2);
3541 nic_offset += nic_delta_between_nics;
3546 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3548 struct gaudi_device *gaudi = hdev->asic_specific;
3550 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3553 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3554 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3556 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3559 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3561 struct gaudi_device *gaudi = hdev->asic_specific;
3563 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3566 /* Stop CPs of HBM DMA QMANs */
3568 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3569 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3570 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3571 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3572 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3575 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3577 struct gaudi_device *gaudi = hdev->asic_specific;
3579 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3582 /* Stop CPs of MME QMANs */
3583 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3587 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3589 struct gaudi_device *gaudi = hdev->asic_specific;
3591 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3594 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3595 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3596 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3597 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3598 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3599 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3600 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3601 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3604 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3606 struct gaudi_device *gaudi = hdev->asic_specific;
3608 /* Stop upper CPs of QMANs */
3610 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3611 WREG32(mmNIC0_QM0_GLBL_CFG1,
3612 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3616 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3617 WREG32(mmNIC0_QM1_GLBL_CFG1,
3618 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3622 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3623 WREG32(mmNIC1_QM0_GLBL_CFG1,
3624 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3628 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3629 WREG32(mmNIC1_QM1_GLBL_CFG1,
3630 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3634 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3635 WREG32(mmNIC2_QM0_GLBL_CFG1,
3636 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3640 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3641 WREG32(mmNIC2_QM1_GLBL_CFG1,
3642 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3646 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3647 WREG32(mmNIC3_QM0_GLBL_CFG1,
3648 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3652 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3653 WREG32(mmNIC3_QM1_GLBL_CFG1,
3654 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3655 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3656 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3658 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3659 WREG32(mmNIC4_QM0_GLBL_CFG1,
3660 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3661 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3662 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3664 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3665 WREG32(mmNIC4_QM1_GLBL_CFG1,
3666 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3667 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3668 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3671 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3673 struct gaudi_device *gaudi = hdev->asic_specific;
3675 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3678 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3679 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3680 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3683 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3685 struct gaudi_device *gaudi = hdev->asic_specific;
3687 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3690 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3691 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3692 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3693 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3694 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3697 static void gaudi_mme_stall(struct hl_device *hdev)
3699 struct gaudi_device *gaudi = hdev->asic_specific;
3701 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3704 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3705 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3706 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3707 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3708 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3709 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3710 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3711 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3712 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3713 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3714 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3715 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3716 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3717 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3718 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3719 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3720 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3723 static void gaudi_tpc_stall(struct hl_device *hdev)
3725 struct gaudi_device *gaudi = hdev->asic_specific;
3727 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3730 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3731 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3732 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3733 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3734 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3735 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3736 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3737 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3740 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3745 if (hdev->asic_prop.fw_security_enabled)
3748 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3749 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3750 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3752 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3755 WREG32(mmMME0_QM_CGM_CFG, 0);
3756 WREG32(mmMME0_QM_CGM_CFG1, 0);
3757 WREG32(mmMME2_QM_CGM_CFG, 0);
3758 WREG32(mmMME2_QM_CGM_CFG1, 0);
3760 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3761 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3762 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3764 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3768 static void gaudi_enable_timestamp(struct hl_device *hdev)
3770 /* Disable the timestamp counter */
3771 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3773 /* Zero the lower/upper parts of the 64-bit counter */
3774 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3775 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3777 /* Enable the counter */
3778 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3781 static void gaudi_disable_timestamp(struct hl_device *hdev)
3783 /* Disable the timestamp counter */
3784 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3787 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3789 u32 wait_timeout_ms;
3792 "Halting compute engines and disabling interrupts\n");
3795 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3797 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3802 gaudi_stop_nic_qmans(hdev);
3803 gaudi_stop_mme_qmans(hdev);
3804 gaudi_stop_tpc_qmans(hdev);
3805 gaudi_stop_hbm_dma_qmans(hdev);
3806 gaudi_stop_pci_dma_qmans(hdev);
3808 msleep(wait_timeout_ms);
3810 gaudi_pci_dma_stall(hdev);
3811 gaudi_hbm_dma_stall(hdev);
3812 gaudi_tpc_stall(hdev);
3813 gaudi_mme_stall(hdev);
3815 msleep(wait_timeout_ms);
3817 gaudi_disable_nic_qmans(hdev);
3818 gaudi_disable_mme_qmans(hdev);
3819 gaudi_disable_tpc_qmans(hdev);
3820 gaudi_disable_hbm_dma_qmans(hdev);
3821 gaudi_disable_pci_dma_qmans(hdev);
3823 gaudi_disable_timestamp(hdev);
3826 gaudi_disable_msi(hdev);
3829 static int gaudi_mmu_init(struct hl_device *hdev)
3831 struct asic_fixed_properties *prop = &hdev->asic_prop;
3832 struct gaudi_device *gaudi = hdev->asic_specific;
3836 if (!hdev->mmu_enable)
3839 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3842 for (i = 0 ; i < prop->max_asid ; i++) {
3843 hop0_addr = prop->mmu_pgt_addr +
3844 (i * prop->mmu_hop_table_size);
3846 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3849 "failed to set hop0 addr for asid %d\n", i);
3854 /* init MMU cache manage page */
3855 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3856 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3858 /* mem cache invalidation */
3859 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3861 hl_mmu_invalidate_cache(hdev, true, 0);
3863 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3864 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3866 WREG32(mmSTLB_HOP_CONFIGURATION,
3867 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3870 * The H/W expects the first PI after init to be 1. After wraparound
3873 gaudi->mmu_cache_inv_pi = 1;
3875 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3883 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3887 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3889 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3892 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3896 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3898 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3901 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3903 struct dynamic_fw_load_mgr *dynamic_loader;
3904 struct cpu_dyn_regs *dyn_regs;
3906 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3909 * here we update initial values for few specific dynamic regs (as
3910 * before reading the first descriptor from FW those value has to be
3911 * hard-coded) in later stages of the protocol those values will be
3912 * updated automatically by reading the FW descriptor so data there
3913 * will always be up-to-date
3915 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3916 dyn_regs->kmd_msg_to_cpu =
3917 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3918 dyn_regs->cpu_cmd_status_to_host =
3919 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3921 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3924 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3926 struct static_fw_load_mgr *static_loader;
3928 static_loader = &hdev->fw_loader.static_loader;
3930 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3931 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3932 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3933 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3934 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3935 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3936 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3937 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3938 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3939 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3940 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3941 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3942 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3943 GAUDI_PLDM_RESET_WAIT_MSEC :
3944 GAUDI_CPU_RESET_WAIT_MSEC;
3947 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3949 struct asic_fixed_properties *prop = &hdev->asic_prop;
3950 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3952 /* fill common fields */
3953 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3954 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3955 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3956 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3957 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3958 fw_loader->skip_bmc = !hdev->bmc_enable;
3959 fw_loader->sram_bar_id = SRAM_BAR_ID;
3960 fw_loader->dram_bar_id = HBM_BAR_ID;
3962 if (prop->dynamic_fw_load)
3963 gaudi_init_dynamic_firmware_loader(hdev);
3965 gaudi_init_static_firmware_loader(hdev);
3968 static int gaudi_init_cpu(struct hl_device *hdev)
3970 struct gaudi_device *gaudi = hdev->asic_specific;
3973 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3976 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3980 * The device CPU works with 40 bits addresses.
3981 * This register sets the extension to 50 bits.
3983 if (!hdev->asic_prop.fw_security_enabled)
3984 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3986 rc = hl_fw_init_cpu(hdev);
3991 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3996 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3998 struct cpu_dyn_regs *dyn_regs =
3999 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4000 struct asic_fixed_properties *prop = &hdev->asic_prop;
4001 struct gaudi_device *gaudi = hdev->asic_specific;
4002 u32 status, irq_handler_offset;
4004 struct hl_hw_queue *cpu_pq =
4005 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4008 if (!hdev->cpu_queues_enable)
4011 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4014 eq = &hdev->event_queue;
4016 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4017 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4019 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4020 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4022 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4023 lower_32_bits(hdev->cpu_accessible_dma_address));
4024 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4025 upper_32_bits(hdev->cpu_accessible_dma_address));
4027 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4028 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4029 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4031 /* Used for EQ CI */
4032 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4034 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4036 if (gaudi->multi_msi_mode)
4037 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4039 WREG32(mmCPU_IF_QUEUE_INIT,
4040 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4042 irq_handler_offset = prop->gic_interrupts_enable ?
4043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4044 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4046 WREG32(irq_handler_offset,
4047 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4049 err = hl_poll_timeout(
4051 mmCPU_IF_QUEUE_INIT,
4053 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4059 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4063 /* update FW application security bits */
4064 if (prop->fw_cpu_boot_dev_sts0_valid)
4065 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4066 if (prop->fw_cpu_boot_dev_sts1_valid)
4067 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4069 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4073 static void gaudi_pre_hw_init(struct hl_device *hdev)
4075 /* Perform read from the device to make sure device is up */
4078 if (!hdev->asic_prop.fw_security_enabled) {
4079 /* Set the access through PCI bars (Linux driver only) as
4082 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4083 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4084 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4086 /* Perform read to flush the waiting writes to ensure
4087 * configuration was set in the device
4089 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4093 * Let's mark in the H/W that we have reached this point. We check
4094 * this value in the reset_before_init function to understand whether
4095 * we need to reset the chip before doing H/W init. This register is
4096 * cleared by the H/W upon H/W reset
4098 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4101 static int gaudi_hw_init(struct hl_device *hdev)
4103 struct gaudi_device *gaudi = hdev->asic_specific;
4106 gaudi_pre_hw_init(hdev);
4108 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4109 * So we set it here and if anyone tries to move it later to
4110 * a different address, there will be an error
4112 if (hdev->asic_prop.iatu_done_by_fw)
4113 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4116 * Before pushing u-boot/linux to device, need to set the hbm bar to
4117 * base address of dram
4119 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4121 "failed to map HBM bar to DRAM base address\n");
4125 rc = gaudi_init_cpu(hdev);
4127 dev_err(hdev->dev, "failed to initialize CPU\n");
4131 /* In case the clock gating was enabled in preboot we need to disable
4132 * it here before touching the MME/TPC registers.
4134 gaudi_disable_clock_gating(hdev);
4136 /* SRAM scrambler must be initialized after CPU is running from HBM */
4137 gaudi_init_scrambler_sram(hdev);
4139 /* This is here just in case we are working without CPU */
4140 gaudi_init_scrambler_hbm(hdev);
4142 gaudi_init_golden_registers(hdev);
4144 rc = gaudi_mmu_init(hdev);
4148 gaudi_init_security(hdev);
4150 gaudi_init_pci_dma_qmans(hdev);
4152 gaudi_init_hbm_dma_qmans(hdev);
4154 gaudi_init_mme_qmans(hdev);
4156 gaudi_init_tpc_qmans(hdev);
4158 gaudi_init_nic_qmans(hdev);
4160 gaudi_enable_timestamp(hdev);
4162 /* MSI must be enabled before CPU queues and NIC are initialized */
4163 rc = gaudi_enable_msi(hdev);
4165 goto disable_queues;
4167 /* must be called after MSI was enabled */
4168 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4170 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4175 /* Perform read from the device to flush all configuration */
4181 gaudi_disable_msi(hdev);
4183 gaudi_disable_mme_qmans(hdev);
4184 gaudi_disable_pci_dma_qmans(hdev);
4189 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4191 struct cpu_dyn_regs *dyn_regs =
4192 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4193 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4194 struct gaudi_device *gaudi = hdev->asic_specific;
4195 bool driver_performs_reset;
4198 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4203 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4204 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4206 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4207 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4212 "Firmware performs HARD reset, going to wait %dms\n",
4218 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4219 !hdev->asic_prop.hard_reset_done_by_fw);
4221 /* Set device to handle FLR by H/W as we will put the device CPU to
4224 if (driver_performs_reset)
4225 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4226 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4228 /* If linux is loaded in the device CPU we need to communicate with it
4229 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4230 * registers in case of old F/Ws
4232 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4233 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4234 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4235 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4237 WREG32(irq_handler_offset,
4238 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4240 /* This is a hail-mary attempt to revive the card in the small chance that the
4241 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4242 * In that case, triggering reset through GIC won't help. We need to trigger the
4243 * reset as if Linux wasn't loaded.
4245 * We do it only if the reset cause was HB, because that would be the indication
4248 * In case watchdog hasn't expired but we still got HB, then this won't do any
4251 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4252 if (hdev->asic_prop.hard_reset_done_by_fw)
4253 hl_fw_ask_hard_reset_without_linux(hdev);
4255 hl_fw_ask_halt_machine_without_linux(hdev);
4258 if (hdev->asic_prop.hard_reset_done_by_fw)
4259 hl_fw_ask_hard_reset_without_linux(hdev);
4261 hl_fw_ask_halt_machine_without_linux(hdev);
4264 if (driver_performs_reset) {
4266 /* Configure the reset registers. Must be done as early as
4267 * possible in case we fail during H/W initialization
4269 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4270 (CFG_RST_H_DMA_MASK |
4271 CFG_RST_H_MME_MASK |
4273 CFG_RST_H_TPC_7_MASK));
4275 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4277 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4278 (CFG_RST_H_HBM_MASK |
4279 CFG_RST_H_TPC_7_MASK |
4280 CFG_RST_H_NIC_MASK |
4282 CFG_RST_H_DMA_MASK |
4283 CFG_RST_H_MME_MASK |
4284 CFG_RST_H_CPU_MASK |
4285 CFG_RST_H_MMU_MASK));
4287 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4288 (CFG_RST_L_IF_MASK |
4289 CFG_RST_L_PSOC_MASK |
4290 CFG_RST_L_TPC_MASK));
4292 msleep(cpu_timeout_ms);
4294 /* Tell ASIC not to re-initialize PCIe */
4295 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4297 /* Restart BTL/BLR upon hard-reset */
4298 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4300 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4301 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4304 "Issued HARD reset command, going to wait %dms\n",
4308 "Firmware performs HARD reset, going to wait %dms\n",
4314 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4315 * itself is in reset. Need to wait until the reset is deasserted
4317 msleep(reset_timeout_ms);
4319 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4320 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4322 "Timeout while waiting for device to reset 0x%x\n",
4326 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4327 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4328 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4329 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4330 HW_CAP_HBM_SCRAMBLER);
4332 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4334 hdev->device_cpu_is_halted = false;
4338 static int gaudi_suspend(struct hl_device *hdev)
4342 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4344 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4349 static int gaudi_resume(struct hl_device *hdev)
4351 return gaudi_init_iatu(hdev);
4354 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4355 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4359 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4360 VM_DONTCOPY | VM_NORESERVE;
4362 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4363 (dma_addr - HOST_PHYS_BASE), size);
4365 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4370 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4372 struct cpu_dyn_regs *dyn_regs =
4373 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4374 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4375 struct gaudi_device *gaudi = hdev->asic_specific;
4376 bool invalid_queue = false;
4379 switch (hw_queue_id) {
4380 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4381 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4382 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4383 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4384 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4387 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4388 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4389 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4390 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4391 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4394 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4395 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4396 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4397 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4398 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4401 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4402 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4403 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4404 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4405 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4408 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4409 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4410 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4411 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4412 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4415 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4416 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4417 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4418 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4419 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4422 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4423 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4424 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4425 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4426 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4429 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4430 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4431 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4432 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4433 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4436 case GAUDI_QUEUE_ID_CPU_PQ:
4437 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4438 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4440 invalid_queue = true;
4443 case GAUDI_QUEUE_ID_MME_0_0:
4444 db_reg_offset = mmMME2_QM_PQ_PI_0;
4447 case GAUDI_QUEUE_ID_MME_0_1:
4448 db_reg_offset = mmMME2_QM_PQ_PI_1;
4451 case GAUDI_QUEUE_ID_MME_0_2:
4452 db_reg_offset = mmMME2_QM_PQ_PI_2;
4455 case GAUDI_QUEUE_ID_MME_0_3:
4456 db_reg_offset = mmMME2_QM_PQ_PI_3;
4459 case GAUDI_QUEUE_ID_MME_1_0:
4460 db_reg_offset = mmMME0_QM_PQ_PI_0;
4463 case GAUDI_QUEUE_ID_MME_1_1:
4464 db_reg_offset = mmMME0_QM_PQ_PI_1;
4467 case GAUDI_QUEUE_ID_MME_1_2:
4468 db_reg_offset = mmMME0_QM_PQ_PI_2;
4471 case GAUDI_QUEUE_ID_MME_1_3:
4472 db_reg_offset = mmMME0_QM_PQ_PI_3;
4475 case GAUDI_QUEUE_ID_TPC_0_0:
4476 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4479 case GAUDI_QUEUE_ID_TPC_0_1:
4480 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4483 case GAUDI_QUEUE_ID_TPC_0_2:
4484 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4487 case GAUDI_QUEUE_ID_TPC_0_3:
4488 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4491 case GAUDI_QUEUE_ID_TPC_1_0:
4492 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4495 case GAUDI_QUEUE_ID_TPC_1_1:
4496 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4499 case GAUDI_QUEUE_ID_TPC_1_2:
4500 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4503 case GAUDI_QUEUE_ID_TPC_1_3:
4504 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4507 case GAUDI_QUEUE_ID_TPC_2_0:
4508 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4511 case GAUDI_QUEUE_ID_TPC_2_1:
4512 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4515 case GAUDI_QUEUE_ID_TPC_2_2:
4516 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4519 case GAUDI_QUEUE_ID_TPC_2_3:
4520 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4523 case GAUDI_QUEUE_ID_TPC_3_0:
4524 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4527 case GAUDI_QUEUE_ID_TPC_3_1:
4528 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4531 case GAUDI_QUEUE_ID_TPC_3_2:
4532 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4535 case GAUDI_QUEUE_ID_TPC_3_3:
4536 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4539 case GAUDI_QUEUE_ID_TPC_4_0:
4540 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4543 case GAUDI_QUEUE_ID_TPC_4_1:
4544 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4547 case GAUDI_QUEUE_ID_TPC_4_2:
4548 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4551 case GAUDI_QUEUE_ID_TPC_4_3:
4552 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4555 case GAUDI_QUEUE_ID_TPC_5_0:
4556 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4559 case GAUDI_QUEUE_ID_TPC_5_1:
4560 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4563 case GAUDI_QUEUE_ID_TPC_5_2:
4564 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4567 case GAUDI_QUEUE_ID_TPC_5_3:
4568 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4571 case GAUDI_QUEUE_ID_TPC_6_0:
4572 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4575 case GAUDI_QUEUE_ID_TPC_6_1:
4576 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4579 case GAUDI_QUEUE_ID_TPC_6_2:
4580 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4583 case GAUDI_QUEUE_ID_TPC_6_3:
4584 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4587 case GAUDI_QUEUE_ID_TPC_7_0:
4588 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4591 case GAUDI_QUEUE_ID_TPC_7_1:
4592 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4595 case GAUDI_QUEUE_ID_TPC_7_2:
4596 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4599 case GAUDI_QUEUE_ID_TPC_7_3:
4600 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4603 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4604 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4605 invalid_queue = true;
4607 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4608 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4611 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4612 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4613 invalid_queue = true;
4615 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4616 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4619 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4620 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4621 invalid_queue = true;
4623 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4624 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4627 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4628 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4629 invalid_queue = true;
4631 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4632 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4635 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4636 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4637 invalid_queue = true;
4639 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4640 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4643 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4644 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4645 invalid_queue = true;
4647 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4648 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4651 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4652 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4653 invalid_queue = true;
4655 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4656 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4659 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4660 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4661 invalid_queue = true;
4663 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4664 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4667 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4668 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4669 invalid_queue = true;
4671 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4672 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4675 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4676 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4677 invalid_queue = true;
4679 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4680 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4684 invalid_queue = true;
4687 if (invalid_queue) {
4688 /* Should never get here */
4689 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4696 /* ring the doorbell */
4697 WREG32(db_reg_offset, db_value);
4699 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4700 /* make sure device CPU will read latest data from host */
4703 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4704 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4705 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4707 WREG32(irq_handler_offset,
4708 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4712 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4715 __le64 *pbd = (__le64 *) bd;
4717 /* The QMANs are on the host memory so a simple copy suffice */
4722 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4723 dma_addr_t *dma_handle, gfp_t flags)
4725 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4728 /* Shift to the device's base physical address of host memory */
4730 *dma_handle += HOST_PHYS_BASE;
4735 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4736 void *cpu_addr, dma_addr_t dma_handle)
4738 /* Cancel the device's base physical address of host memory */
4739 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4741 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4744 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4746 struct asic_fixed_properties *prop = &hdev->asic_prop;
4747 u64 cur_addr = DRAM_BASE_ADDR_USER;
4752 while (cur_addr < prop->dram_end_address) {
4753 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4754 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4757 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4760 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4761 cur_addr, cur_addr + chunk_size);
4763 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4764 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4765 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4766 lower_32_bits(cur_addr));
4767 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4768 upper_32_bits(cur_addr));
4769 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4771 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4772 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4773 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4775 cur_addr += chunk_size;
4777 if (cur_addr == prop->dram_end_address)
4781 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4782 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4784 rc = hl_poll_timeout(
4786 mmDMA0_CORE_STS0 + dma_offset,
4788 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4790 HBM_SCRUBBING_TIMEOUT_US);
4794 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4804 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4806 struct asic_fixed_properties *prop = &hdev->asic_prop;
4810 if (!hdev->memory_scrub)
4813 if (!addr && !size) {
4814 /* Wait till device is idle */
4815 rc = hl_poll_timeout(
4817 mmDMA0_CORE_STS0/* dummy */,
4819 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4822 HBM_SCRUBBING_TIMEOUT_US);
4824 dev_err(hdev->dev, "waiting for idle timeout\n");
4829 addr = prop->sram_user_base_address;
4830 size = hdev->pldm ? 0x10000 :
4831 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4832 val = 0x7777777777777777ull;
4834 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4837 "Failed to clear SRAM in mem scrub all\n");
4841 /* Scrub HBM using all DMA channels in parallel */
4842 rc = gaudi_hbm_scrubbing(hdev);
4845 "Failed to clear HBM in mem scrub all\n");
4851 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4852 u32 queue_id, dma_addr_t *dma_handle,
4855 struct gaudi_device *gaudi = hdev->asic_specific;
4856 struct gaudi_internal_qman_info *q;
4858 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4859 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4860 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4864 q = &gaudi->internal_qmans[queue_id];
4865 *dma_handle = q->pq_dma_addr;
4866 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4868 return q->pq_kernel_addr;
4871 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4872 u16 len, u32 timeout, u64 *result)
4874 struct gaudi_device *gaudi = hdev->asic_specific;
4876 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4883 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4885 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4889 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4891 struct packet_msg_prot *fence_pkt;
4892 dma_addr_t pkt_dma_addr;
4893 u32 fence_val, tmp, timeout_usec;
4894 dma_addr_t fence_dma_addr;
4899 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4901 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4903 fence_val = GAUDI_QMAN0_FENCE_VAL;
4905 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4909 "Failed to allocate memory for H/W queue %d testing\n",
4916 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4917 sizeof(struct packet_msg_prot),
4918 GFP_KERNEL, &pkt_dma_addr);
4921 "Failed to allocate packet for H/W queue %d testing\n",
4924 goto free_fence_ptr;
4927 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4928 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4929 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4931 fence_pkt->ctl = cpu_to_le32(tmp);
4932 fence_pkt->value = cpu_to_le32(fence_val);
4933 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4935 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4936 sizeof(struct packet_msg_prot),
4940 "Failed to send fence packet to H/W queue %d\n",
4945 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4946 1000, timeout_usec, true);
4948 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4950 if (rc == -ETIMEDOUT) {
4952 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4953 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4958 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4961 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4966 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4968 struct gaudi_device *gaudi = hdev->asic_specific;
4971 * check capability here as send_cpu_message() won't update the result
4972 * value if no capability
4974 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4977 return hl_fw_test_cpu_queue(hdev);
4980 static int gaudi_test_queues(struct hl_device *hdev)
4982 int i, rc, ret_val = 0;
4984 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4985 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4986 rc = gaudi_test_queue(hdev, i);
4992 rc = gaudi_test_cpu_queue(hdev);
4999 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5000 gfp_t mem_flags, dma_addr_t *dma_handle)
5004 if (size > GAUDI_DMA_POOL_BLK_SIZE)
5007 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5009 /* Shift to the device's base physical address of host memory */
5011 *dma_handle += HOST_PHYS_BASE;
5016 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5017 dma_addr_t dma_addr)
5019 /* Cancel the device's base physical address of host memory */
5020 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5022 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5025 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5026 size_t size, dma_addr_t *dma_handle)
5028 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5031 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5032 size_t size, void *vaddr)
5034 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5037 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5038 int nents, enum dma_data_direction dir)
5040 struct scatterlist *sg;
5043 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5046 /* Shift to the device's base physical address of host memory */
5047 for_each_sg(sgl, sg, nents, i)
5048 sg->dma_address += HOST_PHYS_BASE;
5053 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5054 int nents, enum dma_data_direction dir)
5056 struct scatterlist *sg;
5059 /* Cancel the device's base physical address of host memory */
5060 for_each_sg(sgl, sg, nents, i)
5061 sg->dma_address -= HOST_PHYS_BASE;
5063 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5066 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5067 struct sg_table *sgt)
5069 struct scatterlist *sg, *sg_next_iter;
5070 u32 count, dma_desc_cnt;
5072 dma_addr_t addr, addr_next;
5076 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5078 len = sg_dma_len(sg);
5079 addr = sg_dma_address(sg);
5084 while ((count + 1) < sgt->nents) {
5085 sg_next_iter = sg_next(sg);
5086 len_next = sg_dma_len(sg_next_iter);
5087 addr_next = sg_dma_address(sg_next_iter);
5092 if ((addr + len == addr_next) &&
5093 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5105 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5108 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5109 struct hl_cs_parser *parser,
5110 struct packet_lin_dma *user_dma_pkt,
5111 u64 addr, enum dma_data_direction dir)
5113 struct hl_userptr *userptr;
5116 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5117 parser->job_userptr_list, &userptr))
5118 goto already_pinned;
5120 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5124 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5129 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5131 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5132 userptr->sgt->nents, dir);
5134 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5138 userptr->dma_mapped = true;
5142 parser->patched_cb_size +=
5143 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5148 list_del(&userptr->job_node);
5149 hl_unpin_host_memory(hdev, userptr);
5155 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5156 struct hl_cs_parser *parser,
5157 struct packet_lin_dma *user_dma_pkt,
5160 enum dma_data_direction dir;
5161 bool skip_host_mem_pin = false, user_memset;
5165 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5166 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5167 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5171 skip_host_mem_pin = true;
5173 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5174 dir = DMA_TO_DEVICE;
5175 addr = le64_to_cpu(user_dma_pkt->src_addr);
5177 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5178 dir = DMA_FROM_DEVICE;
5179 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5180 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5181 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5184 if (skip_host_mem_pin)
5185 parser->patched_cb_size += sizeof(*user_dma_pkt);
5187 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5193 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5194 struct hl_cs_parser *parser,
5195 struct packet_lin_dma *user_dma_pkt)
5197 bool src_in_host = false;
5198 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5199 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5200 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5202 dev_dbg(hdev->dev, "DMA packet details:\n");
5203 dev_dbg(hdev->dev, "source == 0x%llx\n",
5204 le64_to_cpu(user_dma_pkt->src_addr));
5205 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5206 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5209 * Special handling for DMA with size 0. Bypass all validations
5210 * because no transactions will be done except for WR_COMP, which
5211 * is not a security issue
5213 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5214 parser->patched_cb_size += sizeof(*user_dma_pkt);
5218 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5221 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5225 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5226 struct hl_cs_parser *parser,
5227 struct packet_load_and_exe *user_pkt)
5231 cfg = le32_to_cpu(user_pkt->cfg);
5233 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5235 "User not allowed to use Load and Execute\n");
5239 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5244 static int gaudi_validate_cb(struct hl_device *hdev,
5245 struct hl_cs_parser *parser, bool is_mmu)
5247 u32 cb_parsed_length = 0;
5250 parser->patched_cb_size = 0;
5252 /* cb_user_size is more than 0 so loop will always be executed */
5253 while (cb_parsed_length < parser->user_cb_size) {
5254 enum packet_id pkt_id;
5256 struct gaudi_packet *user_pkt;
5258 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5260 pkt_id = (enum packet_id) (
5261 (le64_to_cpu(user_pkt->header) &
5262 PACKET_HEADER_PACKET_ID_MASK) >>
5263 PACKET_HEADER_PACKET_ID_SHIFT);
5265 if (!validate_packet_id(pkt_id)) {
5266 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5271 pkt_size = gaudi_packet_sizes[pkt_id];
5272 cb_parsed_length += pkt_size;
5273 if (cb_parsed_length > parser->user_cb_size) {
5275 "packet 0x%x is out of CB boundary\n", pkt_id);
5281 case PACKET_MSG_PROT:
5283 "User not allowed to use MSG_PROT\n");
5288 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5293 dev_err(hdev->dev, "User not allowed to use STOP\n");
5297 case PACKET_WREG_BULK:
5299 "User not allowed to use WREG_BULK\n");
5303 case PACKET_LOAD_AND_EXE:
5304 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5305 (struct packet_load_and_exe *) user_pkt);
5308 case PACKET_LIN_DMA:
5309 parser->contains_dma_pkt = true;
5311 parser->patched_cb_size += pkt_size;
5313 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5314 (struct packet_lin_dma *) user_pkt);
5317 case PACKET_WREG_32:
5318 case PACKET_MSG_LONG:
5319 case PACKET_MSG_SHORT:
5323 case PACKET_ARB_POINT:
5324 parser->patched_cb_size += pkt_size;
5328 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5339 * The new CB should have space at the end for two MSG_PROT packets:
5340 * 1. A packet that will act as a completion packet
5341 * 2. A packet that will generate MSI-X interrupt
5343 if (parser->completion)
5344 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5349 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5350 struct hl_cs_parser *parser,
5351 struct packet_lin_dma *user_dma_pkt,
5352 struct packet_lin_dma *new_dma_pkt,
5353 u32 *new_dma_pkt_size)
5355 struct hl_userptr *userptr;
5356 struct scatterlist *sg, *sg_next_iter;
5357 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5359 dma_addr_t dma_addr, dma_addr_next;
5360 u64 device_memory_addr, addr;
5361 enum dma_data_direction dir;
5362 struct sg_table *sgt;
5363 bool src_in_host = false;
5364 bool skip_host_mem_pin = false;
5367 ctl = le32_to_cpu(user_dma_pkt->ctl);
5369 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5372 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5373 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5376 addr = le64_to_cpu(user_dma_pkt->src_addr);
5377 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5378 dir = DMA_TO_DEVICE;
5380 skip_host_mem_pin = true;
5382 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5383 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5384 dir = DMA_FROM_DEVICE;
5387 if ((!skip_host_mem_pin) &&
5388 (!hl_userptr_is_pinned(hdev, addr,
5389 le32_to_cpu(user_dma_pkt->tsize),
5390 parser->job_userptr_list, &userptr))) {
5391 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5392 addr, user_dma_pkt->tsize);
5396 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5397 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5398 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5402 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5407 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5408 len = sg_dma_len(sg);
5409 dma_addr = sg_dma_address(sg);
5414 while ((count + 1) < sgt->nents) {
5415 sg_next_iter = sg_next(sg);
5416 len_next = sg_dma_len(sg_next_iter);
5417 dma_addr_next = sg_dma_address(sg_next_iter);
5422 if ((dma_addr + len == dma_addr_next) &&
5423 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5432 ctl = le32_to_cpu(user_dma_pkt->ctl);
5433 if (likely(dma_desc_cnt))
5434 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5435 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5436 new_dma_pkt->ctl = cpu_to_le32(ctl);
5437 new_dma_pkt->tsize = cpu_to_le32(len);
5439 if (dir == DMA_TO_DEVICE) {
5440 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5441 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5443 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5444 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5448 device_memory_addr += len;
5453 if (!dma_desc_cnt) {
5455 "Error of 0 SG entries when patching DMA packet\n");
5459 /* Fix the last dma packet - wrcomp must be as user set it */
5461 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5463 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5468 static int gaudi_patch_cb(struct hl_device *hdev,
5469 struct hl_cs_parser *parser)
5471 u32 cb_parsed_length = 0;
5472 u32 cb_patched_cur_length = 0;
5475 /* cb_user_size is more than 0 so loop will always be executed */
5476 while (cb_parsed_length < parser->user_cb_size) {
5477 enum packet_id pkt_id;
5479 u32 new_pkt_size = 0;
5480 struct gaudi_packet *user_pkt, *kernel_pkt;
5482 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5483 kernel_pkt = parser->patched_cb->kernel_address +
5484 cb_patched_cur_length;
5486 pkt_id = (enum packet_id) (
5487 (le64_to_cpu(user_pkt->header) &
5488 PACKET_HEADER_PACKET_ID_MASK) >>
5489 PACKET_HEADER_PACKET_ID_SHIFT);
5491 if (!validate_packet_id(pkt_id)) {
5492 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5497 pkt_size = gaudi_packet_sizes[pkt_id];
5498 cb_parsed_length += pkt_size;
5499 if (cb_parsed_length > parser->user_cb_size) {
5501 "packet 0x%x is out of CB boundary\n", pkt_id);
5507 case PACKET_LIN_DMA:
5508 rc = gaudi_patch_dma_packet(hdev, parser,
5509 (struct packet_lin_dma *) user_pkt,
5510 (struct packet_lin_dma *) kernel_pkt,
5512 cb_patched_cur_length += new_pkt_size;
5515 case PACKET_MSG_PROT:
5517 "User not allowed to use MSG_PROT\n");
5522 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5527 dev_err(hdev->dev, "User not allowed to use STOP\n");
5531 case PACKET_WREG_32:
5532 case PACKET_WREG_BULK:
5533 case PACKET_MSG_LONG:
5534 case PACKET_MSG_SHORT:
5538 case PACKET_ARB_POINT:
5539 case PACKET_LOAD_AND_EXE:
5540 memcpy(kernel_pkt, user_pkt, pkt_size);
5541 cb_patched_cur_length += pkt_size;
5545 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5558 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5559 struct hl_cs_parser *parser)
5561 u64 patched_cb_handle;
5562 u32 patched_cb_size;
5563 struct hl_cb *user_cb;
5567 * The new CB should have space at the end for two MSG_PROT pkt:
5568 * 1. A packet that will act as a completion packet
5569 * 2. A packet that will generate MSI interrupt
5571 if (parser->completion)
5572 parser->patched_cb_size = parser->user_cb_size +
5573 sizeof(struct packet_msg_prot) * 2;
5575 parser->patched_cb_size = parser->user_cb_size;
5577 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5578 parser->patched_cb_size, false, false,
5579 &patched_cb_handle);
5583 "Failed to allocate patched CB for DMA CS %d\n",
5588 patched_cb_handle >>= PAGE_SHIFT;
5589 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5590 (u32) patched_cb_handle);
5591 /* hl_cb_get should never fail */
5592 if (!parser->patched_cb) {
5593 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5594 (u32) patched_cb_handle);
5600 * The check that parser->user_cb_size <= parser->user_cb->size was done
5601 * in validate_queue_index().
5603 memcpy(parser->patched_cb->kernel_address,
5604 parser->user_cb->kernel_address,
5605 parser->user_cb_size);
5607 patched_cb_size = parser->patched_cb_size;
5609 /* Validate patched CB instead of user CB */
5610 user_cb = parser->user_cb;
5611 parser->user_cb = parser->patched_cb;
5612 rc = gaudi_validate_cb(hdev, parser, true);
5613 parser->user_cb = user_cb;
5616 hl_cb_put(parser->patched_cb);
5620 if (patched_cb_size != parser->patched_cb_size) {
5621 dev_err(hdev->dev, "user CB size mismatch\n");
5622 hl_cb_put(parser->patched_cb);
5629 * Always call cb destroy here because we still have 1 reference
5630 * to it by calling cb_get earlier. After the job will be completed,
5631 * cb_put will release it, but here we want to remove it from the
5634 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5635 patched_cb_handle << PAGE_SHIFT);
5640 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5641 struct hl_cs_parser *parser)
5643 u64 patched_cb_handle;
5646 rc = gaudi_validate_cb(hdev, parser, false);
5651 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5652 parser->patched_cb_size, false, false,
5653 &patched_cb_handle);
5656 "Failed to allocate patched CB for DMA CS %d\n", rc);
5660 patched_cb_handle >>= PAGE_SHIFT;
5661 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5662 (u32) patched_cb_handle);
5663 /* hl_cb_get should never fail here */
5664 if (!parser->patched_cb) {
5665 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5666 (u32) patched_cb_handle);
5671 rc = gaudi_patch_cb(hdev, parser);
5674 hl_cb_put(parser->patched_cb);
5678 * Always call cb destroy here because we still have 1 reference
5679 * to it by calling cb_get earlier. After the job will be completed,
5680 * cb_put will release it, but here we want to remove it from the
5683 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5684 patched_cb_handle << PAGE_SHIFT);
5688 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5692 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5693 struct hl_cs_parser *parser)
5695 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5696 struct gaudi_device *gaudi = hdev->asic_specific;
5697 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5698 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5700 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5701 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5702 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5703 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5704 parser->hw_queue_id);
5708 /* For internal queue jobs just check if CB address is valid */
5709 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5710 parser->user_cb_size,
5711 asic_prop->sram_user_base_address,
5712 asic_prop->sram_end_address))
5715 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5716 parser->user_cb_size,
5717 asic_prop->dram_user_base_address,
5718 asic_prop->dram_end_address))
5721 /* PMMU and HPMMU addresses are equal, check only one of them */
5722 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5723 parser->user_cb_size,
5724 asic_prop->pmmu.start_addr,
5725 asic_prop->pmmu.end_addr))
5729 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5730 parser->user_cb, parser->user_cb_size);
5735 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5737 struct gaudi_device *gaudi = hdev->asic_specific;
5739 if (parser->queue_type == QUEUE_TYPE_INT)
5740 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5742 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5743 return gaudi_parse_cb_mmu(hdev, parser);
5745 return gaudi_parse_cb_no_mmu(hdev, parser);
5748 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5749 void *kernel_address, u32 len,
5750 u64 cq_addr, u32 cq_val, u32 msi_vec,
5753 struct gaudi_device *gaudi = hdev->asic_specific;
5754 struct packet_msg_prot *cq_pkt;
5758 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5760 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5761 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5764 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5766 cq_pkt->ctl = cpu_to_le32(tmp);
5767 cq_pkt->value = cpu_to_le32(cq_val);
5768 cq_pkt->addr = cpu_to_le64(cq_addr);
5772 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5773 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5774 cq_pkt->ctl = cpu_to_le32(tmp);
5775 cq_pkt->value = cpu_to_le32(1);
5777 if (gaudi->multi_msi_mode)
5778 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5780 msi_addr = mmPCIE_CORE_MSI_REQ;
5782 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5785 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5787 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5790 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5793 struct packet_lin_dma *lin_dma_pkt;
5794 struct hl_cs_job *job;
5795 u32 cb_size, ctl, err_cause;
5800 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5804 lin_dma_pkt = cb->kernel_address;
5805 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5806 cb_size = sizeof(*lin_dma_pkt);
5808 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5809 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5810 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5811 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5812 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5814 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5815 lin_dma_pkt->src_addr = cpu_to_le64(val);
5816 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5817 lin_dma_pkt->tsize = cpu_to_le32(size);
5819 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5821 dev_err(hdev->dev, "Failed to allocate a new job\n");
5826 /* Verify DMA is OK */
5827 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5828 if (err_cause && !hdev->init_done) {
5830 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5832 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5837 atomic_inc(&job->user_cb->cs_cnt);
5838 job->user_cb_size = cb_size;
5839 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5840 job->patched_cb = job->user_cb;
5841 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5843 hl_debugfs_add_job(hdev, job);
5845 rc = gaudi_send_job_on_qman0(hdev, job);
5846 hl_debugfs_remove_job(hdev, job);
5848 atomic_dec(&cb->cs_cnt);
5850 /* Verify DMA is OK */
5851 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5853 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5855 if (!hdev->init_done) {
5857 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5859 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5866 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5871 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5872 u32 num_regs, u32 val)
5874 struct packet_msg_long *pkt;
5875 struct hl_cs_job *job;
5880 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5882 if (cb_size > SZ_2M) {
5883 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5887 cb = hl_cb_kernel_create(hdev, cb_size, false);
5891 pkt = cb->kernel_address;
5893 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5894 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5895 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5896 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5897 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5899 for (i = 0; i < num_regs ; i++, pkt++) {
5900 pkt->ctl = cpu_to_le32(ctl);
5901 pkt->value = cpu_to_le32(val);
5902 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5905 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5907 dev_err(hdev->dev, "Failed to allocate a new job\n");
5914 atomic_inc(&job->user_cb->cs_cnt);
5915 job->user_cb_size = cb_size;
5916 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5917 job->patched_cb = job->user_cb;
5918 job->job_cb_size = cb_size;
5920 hl_debugfs_add_job(hdev, job);
5922 rc = gaudi_send_job_on_qman0(hdev, job);
5923 hl_debugfs_remove_job(hdev, job);
5925 atomic_dec(&cb->cs_cnt);
5929 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5934 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5940 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5941 num_regs = NUM_OF_SOB_IN_BLOCK;
5942 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5944 dev_err(hdev->dev, "failed resetting SM registers");
5948 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5949 num_regs = NUM_OF_SOB_IN_BLOCK;
5950 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5952 dev_err(hdev->dev, "failed resetting SM registers");
5956 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5957 num_regs = NUM_OF_SOB_IN_BLOCK;
5958 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5960 dev_err(hdev->dev, "failed resetting SM registers");
5964 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5965 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5966 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5968 dev_err(hdev->dev, "failed resetting SM registers");
5972 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5973 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5974 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5976 dev_err(hdev->dev, "failed resetting SM registers");
5980 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5981 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5982 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5984 dev_err(hdev->dev, "failed resetting SM registers");
5988 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5989 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5990 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5991 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993 dev_err(hdev->dev, "failed resetting SM registers");
5997 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5998 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5999 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6000 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6002 dev_err(hdev->dev, "failed resetting SM registers");
6009 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6011 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6012 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6015 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6016 u64 sob_addr = CFG_BASE +
6017 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6019 u32 dma_offset = i * DMA_CORE_OFFSET;
6021 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6022 lower_32_bits(sob_addr));
6023 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6024 upper_32_bits(sob_addr));
6025 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6027 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6028 * modified by the user for SRAM reduction
6031 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6036 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6041 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6042 qman_offset = i * DMA_QMAN_OFFSET;
6043 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6046 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6047 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6048 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6051 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6052 qman_offset = i * TPC_QMAN_OFFSET;
6053 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6056 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6057 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6058 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6059 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6063 static int gaudi_restore_user_registers(struct hl_device *hdev)
6067 rc = gaudi_restore_sm_registers(hdev);
6071 gaudi_restore_dma_registers(hdev);
6072 gaudi_restore_qm_registers(hdev);
6077 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6082 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6084 struct asic_fixed_properties *prop = &hdev->asic_prop;
6085 struct gaudi_device *gaudi = hdev->asic_specific;
6086 u64 addr = prop->mmu_pgt_addr;
6087 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6089 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6092 return gaudi_memset_device_memory(hdev, addr, size, 0);
6095 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6100 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6101 bool user_address, u32 *val)
6103 struct asic_fixed_properties *prop = &hdev->asic_prop;
6104 u64 hbm_bar_addr, host_phys_end;
6107 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6109 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6111 *val = RREG32(addr - CFG_BASE);
6113 } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6115 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6117 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6119 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6121 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6123 if (hbm_bar_addr != U64_MAX) {
6124 *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6125 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6128 if (hbm_bar_addr == U64_MAX)
6131 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6132 user_address && !iommu_present(&pci_bus_type)) {
6134 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6143 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6144 bool user_address, u32 val)
6146 struct asic_fixed_properties *prop = &hdev->asic_prop;
6147 u64 hbm_bar_addr, host_phys_end;
6150 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6152 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6154 WREG32(addr - CFG_BASE, val);
6156 } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6158 writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6160 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6162 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6164 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6166 if (hbm_bar_addr != U64_MAX) {
6167 writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6168 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6171 if (hbm_bar_addr == U64_MAX)
6174 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6175 user_address && !iommu_present(&pci_bus_type)) {
6177 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6186 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6187 bool user_address, u64 *val)
6189 struct asic_fixed_properties *prop = &hdev->asic_prop;
6190 u64 hbm_bar_addr, host_phys_end;
6193 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6195 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6197 u32 val_l = RREG32(addr - CFG_BASE);
6198 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6200 *val = (((u64) val_h) << 32) | val_l;
6202 } else if ((addr >= SRAM_BASE_ADDR) &&
6203 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6205 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6207 } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6209 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6211 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6213 if (hbm_bar_addr != U64_MAX) {
6214 *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6215 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6218 if (hbm_bar_addr == U64_MAX)
6221 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6222 user_address && !iommu_present(&pci_bus_type)) {
6224 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6233 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6234 bool user_address, u64 val)
6236 struct asic_fixed_properties *prop = &hdev->asic_prop;
6237 u64 hbm_bar_addr, host_phys_end;
6240 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6242 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6244 WREG32(addr - CFG_BASE, lower_32_bits(val));
6245 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
6247 } else if ((addr >= SRAM_BASE_ADDR) &&
6248 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6250 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6252 } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6254 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6256 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6258 if (hbm_bar_addr != U64_MAX) {
6259 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6260 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6263 if (hbm_bar_addr == U64_MAX)
6266 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6267 user_address && !iommu_present(&pci_bus_type)) {
6269 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6278 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6279 u32 size_to_dma, dma_addr_t dma_addr)
6285 dma_offset = dma_id * DMA_CORE_OFFSET;
6287 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6288 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6289 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6290 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6291 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6292 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6293 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6295 rc = hl_poll_timeout(
6297 mmDMA0_CORE_STS0 + dma_offset,
6299 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6305 "DMA %d timed-out during reading of 0x%llx\n",
6310 /* Verify DMA is OK */
6311 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6313 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6315 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6317 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6325 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6328 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6329 u32 qm_glbl_sts0, qm_cgm_sts;
6330 u64 dma_offset, qm_offset;
6331 dma_addr_t dma_addr;
6336 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6339 GFP_KERNEL | __GFP_ZERO);
6344 hdev->asic_funcs->hw_queues_lock(hdev);
6346 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6347 dma_offset = dma_id * DMA_CORE_OFFSET;
6348 qm_offset = dma_id * DMA_QMAN_OFFSET;
6349 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6350 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6351 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6352 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6353 IS_DMA_IDLE(dma_core_sts0);
6356 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6357 dma_offset = dma_id * DMA_CORE_OFFSET;
6358 qm_offset = dma_id * DMA_QMAN_OFFSET;
6359 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6360 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6361 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6362 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6363 IS_DMA_IDLE(dma_core_sts0);
6366 dev_err_ratelimited(hdev->dev,
6367 "Can't read via DMA because it is BUSY\n");
6373 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6374 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6375 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6377 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6378 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6381 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6383 /* Verify DMA is OK */
6384 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6387 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6389 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6394 size_to_dma = SZ_2M;
6396 while (size_left > 0) {
6398 if (size_left < SZ_2M)
6399 size_to_dma = size_left;
6401 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6406 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6408 if (size_left <= SZ_2M)
6416 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6417 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6420 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6421 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6423 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6426 hdev->asic_funcs->hw_queues_unlock(hdev);
6428 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6434 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6436 struct gaudi_device *gaudi = hdev->asic_specific;
6438 if (hdev->reset_info.hard_reset_pending)
6441 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6442 (addr - gaudi->hbm_bar_cur_addr));
6445 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6447 struct gaudi_device *gaudi = hdev->asic_specific;
6449 if (hdev->reset_info.hard_reset_pending)
6452 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6453 (addr - gaudi->hbm_bar_cur_addr));
6456 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6458 /* mask to zero the MMBP and ASID bits */
6459 WREG32_AND(reg, ~0x7FF);
6460 WREG32_OR(reg, asid);
6463 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6465 struct gaudi_device *gaudi = hdev->asic_specific;
6467 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6470 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6471 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6475 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6476 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6477 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6478 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6479 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6481 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6482 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6483 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6484 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6485 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6487 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6488 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6489 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6490 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6491 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6493 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6494 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6495 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6496 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6497 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6499 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6500 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6501 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6502 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6503 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6505 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6506 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6507 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6508 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6509 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6511 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6512 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6513 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6514 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6515 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6517 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6518 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6519 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6520 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6521 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6523 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6524 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6525 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6526 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6527 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6528 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6529 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6530 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6532 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6533 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6534 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6535 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6536 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6537 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6538 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6540 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6541 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6542 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6543 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6544 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6545 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6546 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6548 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6549 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6550 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6551 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6552 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6553 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6554 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6556 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6557 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6558 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6559 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6560 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6561 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6562 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6564 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6580 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6582 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6583 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6586 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6594 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6601 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6602 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6603 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6605 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6607 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6608 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6609 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6610 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6611 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6612 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6613 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6614 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6617 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6618 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6620 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6621 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6623 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6625 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6627 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6629 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6633 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6634 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6636 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6638 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6640 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6642 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6646 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6647 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6649 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6651 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6653 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6655 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6659 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6660 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6662 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6664 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6666 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6668 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6672 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6673 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6675 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6677 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6679 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6681 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6685 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6686 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6688 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6690 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6692 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6694 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6698 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6699 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6701 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6703 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6705 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6707 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6711 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6712 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6714 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6716 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6718 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6720 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6724 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6725 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6727 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6729 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6731 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6733 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6737 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6738 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6740 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6742 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6744 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6746 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6750 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6751 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6754 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6755 struct hl_cs_job *job)
6757 struct packet_msg_prot *fence_pkt;
6759 dma_addr_t fence_dma_addr;
6761 u32 tmp, timeout, dma_offset;
6765 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6767 timeout = HL_DEVICE_TIMEOUT_USEC;
6769 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6770 dev_err_ratelimited(hdev->dev,
6771 "Can't send driver job on QMAN0 because the device is not idle\n");
6775 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6779 "Failed to allocate fence memory for QMAN0\n");
6783 cb = job->patched_cb;
6785 fence_pkt = cb->kernel_address +
6786 job->job_cb_size - sizeof(struct packet_msg_prot);
6788 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6789 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6790 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6792 fence_pkt->ctl = cpu_to_le32(tmp);
6793 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6794 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6796 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6798 WREG32(mmDMA0_CORE_PROT + dma_offset,
6799 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6801 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6802 job->job_cb_size, cb->bus_address);
6804 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6805 goto free_fence_ptr;
6808 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6809 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6812 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6814 if (rc == -ETIMEDOUT) {
6815 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6816 goto free_fence_ptr;
6820 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6822 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6827 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6829 if (event_type >= GAUDI_EVENT_SIZE)
6830 goto event_not_supported;
6832 if (!gaudi_irq_map_table[event_type].valid)
6833 goto event_not_supported;
6835 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6839 event_not_supported:
6840 snprintf(desc, size, "N/A");
6843 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6844 bool is_write, s32 *engine_id_1,
6847 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6849 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6850 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6853 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6854 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6858 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6859 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6863 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6864 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6868 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6869 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6874 goto unknown_initiator;
6877 for (i = 0 ; i < 2 ; i++) {
6878 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6879 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6883 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6884 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6885 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6886 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6888 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6889 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6892 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6893 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6894 return "DMA0 or DMA2";
6896 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6897 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6898 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6899 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6901 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6902 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6905 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6906 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6907 return "DMA1 or DMA3";
6909 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6910 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6911 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6912 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6914 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6915 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6918 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6919 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6920 return "DMA4 or DMA6";
6922 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6923 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6924 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6925 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6927 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6928 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6931 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6932 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6933 return "DMA5 or DMA7";
6938 return "unknown initiator";
6941 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6942 u32 *engine_id_1, u32 *engine_id_2)
6944 u32 val, x_y, axi_id;
6946 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6947 RREG32(mmMMU_UP_RAZWI_READ_ID);
6948 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6949 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6950 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6951 RAZWI_INITIATOR_AXI_ID_SHIFT);
6954 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6955 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6956 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6959 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6960 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6964 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6965 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6967 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6968 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6969 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6971 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6972 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6973 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6975 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6976 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6978 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6979 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6980 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6983 /* PCI, CPU or PSOC does not have engine id*/
6984 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6986 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6988 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6991 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6992 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6993 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6994 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6995 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6996 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6997 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6998 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6999 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
7000 engine_id_1, engine_id_2);
7001 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7002 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7003 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
7006 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7007 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
7010 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7011 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
7015 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7016 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
7018 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7019 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7020 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
7022 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7023 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7024 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
7026 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7027 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
7029 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7030 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7031 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
7034 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7035 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
7038 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7039 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
7048 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7050 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7051 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7052 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7053 RAZWI_INITIATOR_AXI_ID_MASK);
7055 return "unknown initiator";
7058 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
7062 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7063 dev_err_ratelimited(hdev->dev,
7064 "RAZWI event caused by illegal write of %s\n",
7065 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
7066 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7069 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7070 dev_err_ratelimited(hdev->dev,
7071 "RAZWI event caused by illegal read of %s\n",
7072 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
7073 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7077 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
7079 struct gaudi_device *gaudi = hdev->asic_specific;
7082 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7085 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7086 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7087 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7089 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7091 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
7092 *type = HL_RAZWI_PAGE_FAULT;
7094 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7097 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7098 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7099 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7101 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7103 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
7104 *type = HL_RAZWI_MMU_ACCESS_ERROR;
7106 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7111 * +-------------------+------------------------------------------------------+
7112 * | Configuration Reg | Description |
7114 * +-------------------+------------------------------------------------------+
7115 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7116 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7117 * | |0xF34 memory wrappers 63:32 |
7118 * | |0xF38 memory wrappers 95:64 |
7119 * | |0xF3C memory wrappers 127:96 |
7120 * +-------------------+------------------------------------------------------+
7121 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7122 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7123 * | |0xF44 memory wrappers 63:32 |
7124 * | |0xF48 memory wrappers 95:64 |
7125 * | |0xF4C memory wrappers 127:96 |
7126 * +-------------------+------------------------------------------------------+
7128 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7129 struct ecc_info_extract_params *params, u64 *ecc_address,
7130 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7132 u32 i, num_mem_regs, reg, err_bit;
7133 u64 err_addr, err_word = 0;
7135 num_mem_regs = params->num_memories / 32 +
7136 ((params->num_memories % 32) ? 1 : 0);
7138 if (params->block_address >= CFG_BASE)
7139 params->block_address -= CFG_BASE;
7142 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7144 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7146 /* Set invalid wrapper index */
7147 *memory_wrapper_idx = 0xFF;
7149 /* Iterate through memory wrappers, a single bit must be set */
7150 for (i = 0 ; i < num_mem_regs ; i++) {
7152 err_word = RREG32(err_addr);
7154 err_bit = __ffs(err_word);
7155 *memory_wrapper_idx = err_bit + (32 * i);
7160 if (*memory_wrapper_idx == 0xFF) {
7161 dev_err(hdev->dev, "ECC error information cannot be found\n");
7165 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7166 *memory_wrapper_idx);
7169 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7171 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7173 /* Clear error indication */
7174 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7176 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7178 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7180 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7186 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7188 * @idx: the current pi/ci value
7189 * @q_len: the queue length (power of 2)
7191 * @return the cyclically decremented index
7193 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7195 u32 mask = q_len - 1;
7198 * modular decrement is equivalent to adding (queue_size -1)
7199 * later we take LSBs to make sure the value is in the
7200 * range [0, queue_len - 1]
7202 return (idx + q_len - 1) & mask;
7206 * gaudi_print_sw_config_stream_data - print SW config stream data
7208 * @hdev: pointer to the habanalabs device structure
7209 * @stream: the QMAN's stream
7210 * @qman_base: base address of QMAN registers block
7212 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7215 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7216 u32 cq_ptr_lo_off, size;
7218 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7220 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7221 stream * cq_ptr_lo_off;
7222 cq_ptr_hi = cq_ptr_lo +
7223 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7224 cq_tsize = cq_ptr_lo +
7225 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7227 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7228 size = RREG32(cq_tsize);
7229 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7230 stream, cq_ptr, size);
7234 * gaudi_print_last_pqes_on_err - print last PQEs on error
7236 * @hdev: pointer to the habanalabs device structure
7237 * @qid_base: first QID of the QMAN (out of 4 streams)
7238 * @stream: the QMAN's stream
7239 * @qman_base: base address of QMAN registers block
7240 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7242 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7243 u32 stream, u64 qman_base,
7246 u32 ci, qm_ci_stream_off, queue_len;
7247 struct hl_hw_queue *q;
7251 q = &hdev->kernel_queues[qid_base + stream];
7253 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7254 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7255 stream * qm_ci_stream_off;
7257 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7258 q->int_queue_len : HL_QUEUE_LENGTH;
7260 hdev->asic_funcs->hw_queues_lock(hdev);
7263 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7267 /* we should start printing form ci -1 */
7268 ci = gaudi_queue_idx_dec(ci, queue_len);
7270 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7275 bd = q->kernel_address;
7278 len = le32_to_cpu(bd->len);
7279 /* len 0 means uninitialized entry- break */
7283 addr = le64_to_cpu(bd->ptr);
7285 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7286 stream, ci, addr, len);
7288 /* get previous ci, wrap if needed */
7289 ci = gaudi_queue_idx_dec(ci, queue_len);
7292 hdev->asic_funcs->hw_queues_unlock(hdev);
7296 * print_qman_data_on_err - extract QMAN data on error
7298 * @hdev: pointer to the habanalabs device structure
7299 * @qid_base: first QID of the QMAN (out of 4 streams)
7300 * @stream: the QMAN's stream
7301 * @qman_base: base address of QMAN registers block
7303 * This function attempt to exatract as much data as possible on QMAN error.
7304 * On upper CP print the SW config stream data and last 8 PQEs.
7305 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7307 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7308 u32 stream, u64 qman_base)
7312 if (stream != QMAN_STREAMS) {
7313 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7318 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7320 for (i = 0; i < QMAN_STREAMS; i++)
7321 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7325 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7326 const char *qm_name,
7330 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7331 u64 glbl_sts_addr, arb_err_addr;
7334 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7335 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7337 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7338 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7339 glbl_sts_clr_val = 0;
7340 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7345 if (i == QMAN_STREAMS)
7346 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7348 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7350 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7351 if (glbl_sts_val & BIT(j)) {
7352 dev_err_ratelimited(hdev->dev,
7353 "%s %s. err cause: %s\n",
7355 gaudi_qman_error_cause[j]);
7356 glbl_sts_clr_val |= BIT(j);
7360 /* Write 1 clear errors */
7361 if (!hdev->stop_on_err)
7362 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7364 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7367 arb_err_val = RREG32(arb_err_addr);
7372 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7373 if (arb_err_val & BIT(j)) {
7374 dev_err_ratelimited(hdev->dev,
7375 "%s ARB_ERR. err cause: %s\n",
7377 gaudi_qman_arb_error_cause[j]);
7382 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7383 struct hl_eq_sm_sei_data *sei_data)
7385 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7387 /* Flip the bits as the enum is ordered in the opposite way */
7388 index = (index ^ 0x3) & 0x3;
7390 switch (sei_data->sei_cause) {
7391 case SM_SEI_SO_OVERFLOW:
7392 dev_err_ratelimited(hdev->dev,
7393 "%s SEI Error: SOB Group %u overflow/underflow",
7394 gaudi_sync_manager_names[index],
7395 le32_to_cpu(sei_data->sei_log));
7397 case SM_SEI_LBW_4B_UNALIGNED:
7398 dev_err_ratelimited(hdev->dev,
7399 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7400 gaudi_sync_manager_names[index],
7401 le32_to_cpu(sei_data->sei_log));
7403 case SM_SEI_AXI_RESPONSE_ERR:
7404 dev_err_ratelimited(hdev->dev,
7405 "%s SEI Error: AXI ID %u response error",
7406 gaudi_sync_manager_names[index],
7407 le32_to_cpu(sei_data->sei_log));
7410 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7411 le32_to_cpu(sei_data->sei_log));
7416 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7417 struct hl_eq_ecc_data *ecc_data)
7419 struct ecc_info_extract_params params;
7420 u64 ecc_address = 0, ecc_syndrom = 0;
7421 u8 index, memory_wrapper_idx = 0;
7422 bool extract_info_from_fw;
7425 if (hdev->asic_prop.fw_security_enabled) {
7426 extract_info_from_fw = true;
7427 goto extract_ecc_info;
7430 switch (event_type) {
7431 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7432 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7433 extract_info_from_fw = true;
7435 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7436 index = event_type - GAUDI_EVENT_TPC0_SERR;
7437 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7438 params.num_memories = 90;
7439 params.derr = false;
7440 extract_info_from_fw = false;
7442 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7443 index = event_type - GAUDI_EVENT_TPC0_DERR;
7444 params.block_address =
7445 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7446 params.num_memories = 90;
7448 extract_info_from_fw = false;
7450 case GAUDI_EVENT_MME0_ACC_SERR:
7451 case GAUDI_EVENT_MME1_ACC_SERR:
7452 case GAUDI_EVENT_MME2_ACC_SERR:
7453 case GAUDI_EVENT_MME3_ACC_SERR:
7454 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7455 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7456 params.num_memories = 128;
7457 params.derr = false;
7458 extract_info_from_fw = false;
7460 case GAUDI_EVENT_MME0_ACC_DERR:
7461 case GAUDI_EVENT_MME1_ACC_DERR:
7462 case GAUDI_EVENT_MME2_ACC_DERR:
7463 case GAUDI_EVENT_MME3_ACC_DERR:
7464 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7465 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7466 params.num_memories = 128;
7468 extract_info_from_fw = false;
7470 case GAUDI_EVENT_MME0_SBAB_SERR:
7471 case GAUDI_EVENT_MME1_SBAB_SERR:
7472 case GAUDI_EVENT_MME2_SBAB_SERR:
7473 case GAUDI_EVENT_MME3_SBAB_SERR:
7474 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7475 params.block_address =
7476 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7477 params.num_memories = 33;
7478 params.derr = false;
7479 extract_info_from_fw = false;
7481 case GAUDI_EVENT_MME0_SBAB_DERR:
7482 case GAUDI_EVENT_MME1_SBAB_DERR:
7483 case GAUDI_EVENT_MME2_SBAB_DERR:
7484 case GAUDI_EVENT_MME3_SBAB_DERR:
7485 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7486 params.block_address =
7487 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7488 params.num_memories = 33;
7490 extract_info_from_fw = false;
7497 if (extract_info_from_fw) {
7498 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7499 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7500 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7502 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7503 &ecc_syndrom, &memory_wrapper_idx);
7509 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7510 ecc_address, ecc_syndrom, memory_wrapper_idx);
7513 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7520 switch (event_type) {
7521 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7522 index = event_type - GAUDI_EVENT_TPC0_QM;
7523 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7524 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7525 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7527 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7528 index = event_type - GAUDI_EVENT_MME0_QM;
7529 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7530 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7531 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7533 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7534 index = event_type - GAUDI_EVENT_DMA0_QM;
7535 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7536 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7539 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7540 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7542 case GAUDI_EVENT_NIC0_QM0:
7543 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7544 qman_base = mmNIC0_QM0_BASE;
7545 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7547 case GAUDI_EVENT_NIC0_QM1:
7548 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7549 qman_base = mmNIC0_QM1_BASE;
7550 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7552 case GAUDI_EVENT_NIC1_QM0:
7553 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7554 qman_base = mmNIC1_QM0_BASE;
7555 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7557 case GAUDI_EVENT_NIC1_QM1:
7558 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7559 qman_base = mmNIC1_QM1_BASE;
7560 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7562 case GAUDI_EVENT_NIC2_QM0:
7563 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7564 qman_base = mmNIC2_QM0_BASE;
7565 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7567 case GAUDI_EVENT_NIC2_QM1:
7568 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7569 qman_base = mmNIC2_QM1_BASE;
7570 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7572 case GAUDI_EVENT_NIC3_QM0:
7573 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7574 qman_base = mmNIC3_QM0_BASE;
7575 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7577 case GAUDI_EVENT_NIC3_QM1:
7578 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7579 qman_base = mmNIC3_QM1_BASE;
7580 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7582 case GAUDI_EVENT_NIC4_QM0:
7583 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7584 qman_base = mmNIC4_QM0_BASE;
7585 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7587 case GAUDI_EVENT_NIC4_QM1:
7588 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7589 qman_base = mmNIC4_QM1_BASE;
7590 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7596 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7599 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7602 u32 engine_id_1, engine_id_2;
7609 * Init engine id by default as not valid and only if razwi initiated from engine with
7610 * engine id it will get valid value.
7611 * Init razwi type to default, will be changed only if razwi caused by page fault of
7614 engine_id_1 = U16_MAX;
7615 engine_id_2 = U16_MAX;
7616 razwi_type = U8_MAX;
7618 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7619 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7623 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7624 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7626 /* In case it's the first razwi, save its parameters*/
7627 rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
7629 hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
7630 hdev->last_error.razwi_timestamp = ktime_get();
7631 hdev->last_error.razwi_addr = razwi_addr;
7632 hdev->last_error.razwi_engine_id_1 = engine_id_1;
7633 hdev->last_error.razwi_engine_id_2 = engine_id_2;
7635 * If first engine id holds non valid value the razwi initiator
7636 * does not have engine id
7638 hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
7639 hdev->last_error.razwi_type = razwi_type;
7645 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7646 struct cpucp_pkt_sync_err *sync_err)
7648 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7650 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7651 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7654 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7655 struct hl_eq_fw_alive *fw_alive)
7658 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7659 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7660 "Minor" : "Critical", fw_alive->process_id,
7661 fw_alive->thread_id, fw_alive->uptime_seconds);
7664 static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
7666 /* GAUDI doesn't support any reset except hard-reset */
7670 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7671 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7673 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7676 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7677 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7678 if (!hbm_ecc_data) {
7679 dev_err(hdev->dev, "No FW ECC data");
7683 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7684 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7685 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7686 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7687 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7688 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7689 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7690 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7691 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7692 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7693 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7694 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7695 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7696 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7699 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7700 device, ch, wr_par, rd_par, ca_par, serr, derr);
7702 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7703 device, ch, hbm_ecc_data->first_addr, type,
7704 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7705 hbm_ecc_data->dec_cnt);
7709 if (hdev->asic_prop.fw_security_enabled) {
7710 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7714 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7715 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7716 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7717 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7721 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7722 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7723 (val >> 2) & 0x1, (val >> 3) & 0x1,
7726 val2 = RREG32(base + ch * 0x1000 + 0x060);
7728 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7730 RREG32(base + ch * 0x1000 + 0x064),
7731 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7732 (val2 & 0xFF0000) >> 16,
7733 (val2 & 0xFF000000) >> 24);
7736 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7737 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7741 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7742 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7743 (val >> 2) & 0x1, (val >> 3) & 0x1,
7746 val2 = RREG32(base + ch * 0x1000 + 0x070);
7748 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7750 RREG32(base + ch * 0x1000 + 0x074),
7751 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7752 (val2 & 0xFF0000) >> 16,
7753 (val2 & 0xFF000000) >> 24);
7756 /* Clear interrupts */
7757 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7758 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7759 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7760 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7761 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7762 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7765 val = RREG32(base + 0x8F30);
7766 val2 = RREG32(base + 0x8F34);
7770 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7773 val = RREG32(base + 0x8F40);
7774 val2 = RREG32(base + 0x8F44);
7778 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7785 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7787 switch (hbm_event_type) {
7788 case GAUDI_EVENT_HBM0_SPI_0:
7789 case GAUDI_EVENT_HBM0_SPI_1:
7791 case GAUDI_EVENT_HBM1_SPI_0:
7792 case GAUDI_EVENT_HBM1_SPI_1:
7794 case GAUDI_EVENT_HBM2_SPI_0:
7795 case GAUDI_EVENT_HBM2_SPI_1:
7797 case GAUDI_EVENT_HBM3_SPI_0:
7798 case GAUDI_EVENT_HBM3_SPI_1:
7804 /* Should never happen */
7808 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7809 char *interrupt_name)
7811 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7812 bool soft_reset_required = false;
7814 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7815 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7817 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7818 if (tpc_interrupts_cause & BIT(i)) {
7819 dev_err_ratelimited(hdev->dev,
7820 "TPC%d_%s interrupt cause: %s\n",
7821 tpc_id, interrupt_name,
7822 gaudi_tpc_interrupts_cause[i]);
7823 /* If this is QM error, we need to soft-reset */
7825 soft_reset_required = true;
7828 /* Clear interrupts */
7829 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7831 return soft_reset_required;
7834 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7836 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7839 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7841 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7844 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7847 ktime_t zero_time = ktime_set(0, 0);
7849 mutex_lock(&hdev->clk_throttling.lock);
7851 switch (event_type) {
7852 case GAUDI_EVENT_FIX_POWER_ENV_S:
7853 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7854 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7855 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7856 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7857 dev_info_ratelimited(hdev->dev,
7858 "Clock throttling due to power consumption\n");
7861 case GAUDI_EVENT_FIX_POWER_ENV_E:
7862 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7863 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7864 dev_info_ratelimited(hdev->dev,
7865 "Power envelop is safe, back to optimal clock\n");
7868 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7869 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7870 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7871 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7872 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7873 dev_info_ratelimited(hdev->dev,
7874 "Clock throttling due to overheating\n");
7877 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7878 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7879 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7880 dev_info_ratelimited(hdev->dev,
7881 "Thermal envelop is safe, back to optimal clock\n");
7885 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7890 mutex_unlock(&hdev->clk_throttling.lock);
7893 static void gaudi_handle_eqe(struct hl_device *hdev,
7894 struct hl_eq_entry *eq_entry)
7896 struct gaudi_device *gaudi = hdev->asic_specific;
7897 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7898 u32 fw_fatal_err_flag = 0;
7899 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7900 >> EQ_CTL_EVENT_TYPE_SHIFT);
7901 bool reset_required;
7905 if (event_type >= GAUDI_EVENT_SIZE) {
7906 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7907 event_type, GAUDI_EVENT_SIZE - 1);
7911 gaudi->events_stat[event_type]++;
7912 gaudi->events_stat_aggregate[event_type]++;
7914 switch (event_type) {
7915 case GAUDI_EVENT_PCIE_CORE_DERR:
7916 case GAUDI_EVENT_PCIE_IF_DERR:
7917 case GAUDI_EVENT_PCIE_PHY_DERR:
7918 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7919 case GAUDI_EVENT_MME0_ACC_DERR:
7920 case GAUDI_EVENT_MME0_SBAB_DERR:
7921 case GAUDI_EVENT_MME1_ACC_DERR:
7922 case GAUDI_EVENT_MME1_SBAB_DERR:
7923 case GAUDI_EVENT_MME2_ACC_DERR:
7924 case GAUDI_EVENT_MME2_SBAB_DERR:
7925 case GAUDI_EVENT_MME3_ACC_DERR:
7926 case GAUDI_EVENT_MME3_SBAB_DERR:
7927 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7929 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7930 case GAUDI_EVENT_PSOC_MEM_DERR:
7931 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7932 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7933 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7934 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7935 case GAUDI_EVENT_MMU_DERR:
7936 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7937 gaudi_print_irq_info(hdev, event_type, true);
7938 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7939 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7942 case GAUDI_EVENT_GIC500:
7943 case GAUDI_EVENT_AXI_ECC:
7944 case GAUDI_EVENT_L2_RAM_ECC:
7945 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7946 gaudi_print_irq_info(hdev, event_type, false);
7947 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7950 case GAUDI_EVENT_HBM0_SPI_0:
7951 case GAUDI_EVENT_HBM1_SPI_0:
7952 case GAUDI_EVENT_HBM2_SPI_0:
7953 case GAUDI_EVENT_HBM3_SPI_0:
7954 gaudi_print_irq_info(hdev, event_type, false);
7955 gaudi_hbm_read_interrupts(hdev,
7956 gaudi_hbm_event_to_dev(event_type),
7957 &eq_entry->hbm_ecc_data);
7958 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7961 case GAUDI_EVENT_HBM0_SPI_1:
7962 case GAUDI_EVENT_HBM1_SPI_1:
7963 case GAUDI_EVENT_HBM2_SPI_1:
7964 case GAUDI_EVENT_HBM3_SPI_1:
7965 gaudi_print_irq_info(hdev, event_type, false);
7966 gaudi_hbm_read_interrupts(hdev,
7967 gaudi_hbm_event_to_dev(event_type),
7968 &eq_entry->hbm_ecc_data);
7969 hl_fw_unmask_irq(hdev, event_type);
7972 case GAUDI_EVENT_TPC0_DEC:
7973 case GAUDI_EVENT_TPC1_DEC:
7974 case GAUDI_EVENT_TPC2_DEC:
7975 case GAUDI_EVENT_TPC3_DEC:
7976 case GAUDI_EVENT_TPC4_DEC:
7977 case GAUDI_EVENT_TPC5_DEC:
7978 case GAUDI_EVENT_TPC6_DEC:
7979 case GAUDI_EVENT_TPC7_DEC:
7980 gaudi_print_irq_info(hdev, event_type, true);
7981 reset_required = gaudi_tpc_read_interrupts(hdev,
7982 tpc_dec_event_to_tpc_id(event_type),
7983 "AXI_SLV_DEC_Error");
7984 if (reset_required) {
7985 dev_err(hdev->dev, "reset required due to %s\n",
7986 gaudi_irq_map_table[event_type].name);
7988 hl_device_reset(hdev, 0);
7990 hl_fw_unmask_irq(hdev, event_type);
7994 case GAUDI_EVENT_TPC0_KRN_ERR:
7995 case GAUDI_EVENT_TPC1_KRN_ERR:
7996 case GAUDI_EVENT_TPC2_KRN_ERR:
7997 case GAUDI_EVENT_TPC3_KRN_ERR:
7998 case GAUDI_EVENT_TPC4_KRN_ERR:
7999 case GAUDI_EVENT_TPC5_KRN_ERR:
8000 case GAUDI_EVENT_TPC6_KRN_ERR:
8001 case GAUDI_EVENT_TPC7_KRN_ERR:
8002 gaudi_print_irq_info(hdev, event_type, true);
8003 reset_required = gaudi_tpc_read_interrupts(hdev,
8004 tpc_krn_event_to_tpc_id(event_type),
8006 if (reset_required) {
8007 dev_err(hdev->dev, "reset required due to %s\n",
8008 gaudi_irq_map_table[event_type].name);
8010 hl_device_reset(hdev, 0);
8012 hl_fw_unmask_irq(hdev, event_type);
8016 case GAUDI_EVENT_PCIE_CORE_SERR:
8017 case GAUDI_EVENT_PCIE_IF_SERR:
8018 case GAUDI_EVENT_PCIE_PHY_SERR:
8019 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8020 case GAUDI_EVENT_MME0_ACC_SERR:
8021 case GAUDI_EVENT_MME0_SBAB_SERR:
8022 case GAUDI_EVENT_MME1_ACC_SERR:
8023 case GAUDI_EVENT_MME1_SBAB_SERR:
8024 case GAUDI_EVENT_MME2_ACC_SERR:
8025 case GAUDI_EVENT_MME2_SBAB_SERR:
8026 case GAUDI_EVENT_MME3_ACC_SERR:
8027 case GAUDI_EVENT_MME3_SBAB_SERR:
8028 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8029 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8030 case GAUDI_EVENT_PSOC_MEM_SERR:
8031 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8032 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8033 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8034 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8036 case GAUDI_EVENT_MMU_SERR:
8037 gaudi_print_irq_info(hdev, event_type, true);
8038 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8039 hl_fw_unmask_irq(hdev, event_type);
8042 case GAUDI_EVENT_PCIE_DEC:
8043 case GAUDI_EVENT_MME0_WBC_RSP:
8044 case GAUDI_EVENT_MME0_SBAB0_RSP:
8045 case GAUDI_EVENT_MME1_WBC_RSP:
8046 case GAUDI_EVENT_MME1_SBAB0_RSP:
8047 case GAUDI_EVENT_MME2_WBC_RSP:
8048 case GAUDI_EVENT_MME2_SBAB0_RSP:
8049 case GAUDI_EVENT_MME3_WBC_RSP:
8050 case GAUDI_EVENT_MME3_SBAB0_RSP:
8051 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8052 case GAUDI_EVENT_PSOC_AXI_DEC:
8053 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8054 case GAUDI_EVENT_MMU_PAGE_FAULT:
8055 case GAUDI_EVENT_MMU_WR_PERM:
8056 case GAUDI_EVENT_RAZWI_OR_ADC:
8057 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8058 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8059 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8061 case GAUDI_EVENT_NIC0_QM0:
8062 case GAUDI_EVENT_NIC0_QM1:
8063 case GAUDI_EVENT_NIC1_QM0:
8064 case GAUDI_EVENT_NIC1_QM1:
8065 case GAUDI_EVENT_NIC2_QM0:
8066 case GAUDI_EVENT_NIC2_QM1:
8067 case GAUDI_EVENT_NIC3_QM0:
8068 case GAUDI_EVENT_NIC3_QM1:
8069 case GAUDI_EVENT_NIC4_QM0:
8070 case GAUDI_EVENT_NIC4_QM1:
8071 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8072 gaudi_print_irq_info(hdev, event_type, true);
8073 gaudi_handle_qman_err(hdev, event_type);
8074 hl_fw_unmask_irq(hdev, event_type);
8077 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8078 gaudi_print_irq_info(hdev, event_type, true);
8081 case GAUDI_EVENT_TPC0_BMON_SPMU:
8082 case GAUDI_EVENT_TPC1_BMON_SPMU:
8083 case GAUDI_EVENT_TPC2_BMON_SPMU:
8084 case GAUDI_EVENT_TPC3_BMON_SPMU:
8085 case GAUDI_EVENT_TPC4_BMON_SPMU:
8086 case GAUDI_EVENT_TPC5_BMON_SPMU:
8087 case GAUDI_EVENT_TPC6_BMON_SPMU:
8088 case GAUDI_EVENT_TPC7_BMON_SPMU:
8089 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8090 gaudi_print_irq_info(hdev, event_type, false);
8091 hl_fw_unmask_irq(hdev, event_type);
8094 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8095 gaudi_print_irq_info(hdev, event_type, false);
8096 gaudi_print_sm_sei_info(hdev, event_type,
8097 &eq_entry->sm_sei_data);
8098 rc = hl_state_dump(hdev);
8101 "Error during system state dump %d\n", rc);
8102 hl_fw_unmask_irq(hdev, event_type);
8105 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8106 gaudi_print_clk_change_info(hdev, event_type);
8107 hl_fw_unmask_irq(hdev, event_type);
8110 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8111 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8113 "Received high temp H/W interrupt %d (cause %d)\n",
8117 case GAUDI_EVENT_DEV_RESET_REQ:
8118 gaudi_print_irq_info(hdev, event_type, false);
8121 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8122 gaudi_print_irq_info(hdev, event_type, false);
8123 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8126 case GAUDI_EVENT_FW_ALIVE_S:
8127 gaudi_print_irq_info(hdev, event_type, false);
8128 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8132 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8140 if (hdev->asic_prop.fw_security_enabled)
8141 hl_device_reset(hdev, HL_DRV_RESET_HARD
8142 | HL_DRV_RESET_BYPASS_REQ_TO_FW
8143 | fw_fatal_err_flag);
8144 else if (hdev->hard_reset_on_fw_events)
8145 hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
8147 hl_fw_unmask_irq(hdev, event_type);
8150 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8153 struct gaudi_device *gaudi = hdev->asic_specific;
8156 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8157 return gaudi->events_stat_aggregate;
8160 *size = (u32) sizeof(gaudi->events_stat);
8161 return gaudi->events_stat;
8164 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8167 struct gaudi_device *gaudi = hdev->asic_specific;
8168 u32 status, timeout_usec;
8171 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8172 hdev->reset_info.hard_reset_pending)
8176 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8178 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8180 /* L0 & L1 invalidation */
8181 WREG32(mmSTLB_INV_PS, 3);
8182 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8183 WREG32(mmSTLB_INV_PS, 2);
8185 rc = hl_poll_timeout(
8193 WREG32(mmSTLB_INV_SET, 0);
8198 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8199 bool is_hard, u32 flags,
8200 u32 asid, u64 va, u64 size)
8202 /* Treat as invalidate all because there is no range invalidation
8205 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8208 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8209 u32 asid, u64 phys_addr)
8211 u32 status, timeout_usec;
8215 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8217 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8219 WREG32(MMU_ASID, asid);
8220 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8221 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8222 WREG32(MMU_BUSY, 0x80000000);
8224 rc = hl_poll_timeout(
8228 !(status & 0x80000000),
8234 "Timeout during MMU hop0 config of asid %d\n", asid);
8241 static int gaudi_send_heartbeat(struct hl_device *hdev)
8243 struct gaudi_device *gaudi = hdev->asic_specific;
8245 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8248 return hl_fw_send_heartbeat(hdev);
8251 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8253 struct gaudi_device *gaudi = hdev->asic_specific;
8254 struct asic_fixed_properties *prop = &hdev->asic_prop;
8257 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8260 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8261 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8266 if (!strlen(prop->cpucp_info.card_name))
8267 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8270 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8272 set_default_power_values(hdev);
8274 hdev->max_power = prop->max_power_default;
8279 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8280 u8 mask_len, struct seq_file *s)
8282 struct gaudi_device *gaudi = hdev->asic_specific;
8283 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8284 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8285 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8286 unsigned long *mask = (unsigned long *)mask_arr;
8287 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8288 bool is_idle = true, is_eng_idle, is_slave;
8290 int i, dma_id, port;
8294 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8295 "--- ------- ------------ ---------- -------------\n");
8297 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8298 dma_id = gaudi_dma_assignment[i];
8299 offset = dma_id * DMA_QMAN_OFFSET;
8301 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8302 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8303 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8304 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8305 IS_DMA_IDLE(dma_core_sts0);
8306 is_idle &= is_eng_idle;
8308 if (mask && !is_eng_idle)
8309 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8311 seq_printf(s, fmt, dma_id,
8312 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8313 qm_cgm_sts, dma_core_sts0);
8318 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8319 "--- ------- ------------ ---------- ----------\n");
8321 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8322 offset = i * TPC_QMAN_OFFSET;
8323 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8324 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8325 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8326 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8327 IS_TPC_IDLE(tpc_cfg_sts);
8328 is_idle &= is_eng_idle;
8330 if (mask && !is_eng_idle)
8331 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8333 seq_printf(s, fmt, i,
8334 is_eng_idle ? "Y" : "N",
8335 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8340 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8341 "--- ------- ------------ ---------- -----------\n");
8343 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8344 offset = i * MME_QMAN_OFFSET;
8345 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8346 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8348 /* MME 1 & 3 are slaves, no need to check their QMANs */
8351 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8352 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8353 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8356 is_idle &= is_eng_idle;
8358 if (mask && !is_eng_idle)
8359 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8362 seq_printf(s, fmt, i,
8363 is_eng_idle ? "Y" : "N",
8364 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8366 seq_printf(s, mme_slave_fmt, i,
8367 is_eng_idle ? "Y" : "N", "-",
8373 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8374 "--- ------- ------------ ----------\n");
8376 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8377 offset = i * NIC_MACRO_QMAN_OFFSET;
8379 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8380 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8381 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8382 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8383 is_idle &= is_eng_idle;
8385 if (mask && !is_eng_idle)
8386 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8388 seq_printf(s, nic_fmt, port,
8389 is_eng_idle ? "Y" : "N",
8390 qm_glbl_sts0, qm_cgm_sts);
8394 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8395 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8396 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8397 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8398 is_idle &= is_eng_idle;
8400 if (mask && !is_eng_idle)
8401 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8403 seq_printf(s, nic_fmt, port,
8404 is_eng_idle ? "Y" : "N",
8405 qm_glbl_sts0, qm_cgm_sts);
8415 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8416 __acquires(&gaudi->hw_queues_lock)
8418 struct gaudi_device *gaudi = hdev->asic_specific;
8420 spin_lock(&gaudi->hw_queues_lock);
8423 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8424 __releases(&gaudi->hw_queues_lock)
8426 struct gaudi_device *gaudi = hdev->asic_specific;
8428 spin_unlock(&gaudi->hw_queues_lock);
8431 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8433 return hdev->pdev->device;
8436 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8439 struct gaudi_device *gaudi = hdev->asic_specific;
8441 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8444 return hl_fw_get_eeprom_data(hdev, data, max_size);
8448 * this function should be used only during initialization and/or after reset,
8449 * when there are no active users.
8451 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8457 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8460 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8462 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8464 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8465 lower_32_bits(tpc_kernel));
8466 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8467 upper_32_bits(tpc_kernel));
8469 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8470 lower_32_bits(tpc_kernel));
8471 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8472 upper_32_bits(tpc_kernel));
8473 /* set a valid LUT pointer, content is of no significance */
8474 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8475 lower_32_bits(tpc_kernel));
8476 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8477 upper_32_bits(tpc_kernel));
8479 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8480 lower_32_bits(CFG_BASE +
8481 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8483 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8484 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8485 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8486 /* wait a bit for the engine to start executing */
8487 usleep_range(1000, 1500);
8489 /* wait until engine has finished executing */
8490 rc = hl_poll_timeout(
8492 mmTPC0_CFG_STATUS + offset,
8494 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8495 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8501 "Timeout while waiting for TPC%d icache prefetch\n",
8506 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8507 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8509 /* wait a bit for the engine to start executing */
8510 usleep_range(1000, 1500);
8512 /* wait until engine has finished executing */
8513 rc = hl_poll_timeout(
8515 mmTPC0_CFG_STATUS + offset,
8517 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8518 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8524 "Timeout while waiting for TPC%d vector pipe\n",
8529 rc = hl_poll_timeout(
8531 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8539 "Timeout while waiting for TPC%d kernel to execute\n",
8547 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8550 struct gaudi_device *gaudi = hdev->asic_specific;
8551 int min_alloc_order, rc, collective_cb_size;
8553 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8556 hdev->internal_cb_pool_virt_addr =
8557 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8558 HOST_SPACE_INTERNAL_CB_SZ,
8559 &hdev->internal_cb_pool_dma_addr,
8560 GFP_KERNEL | __GFP_ZERO);
8562 if (!hdev->internal_cb_pool_virt_addr)
8565 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8566 sizeof(struct packet_fence);
8567 min_alloc_order = ilog2(collective_cb_size);
8569 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8570 if (!hdev->internal_cb_pool) {
8572 "Failed to create internal CB pool\n");
8574 goto free_internal_cb_pool;
8577 rc = gen_pool_add(hdev->internal_cb_pool,
8578 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8579 HOST_SPACE_INTERNAL_CB_SZ, -1);
8582 "Failed to add memory to internal CB pool\n");
8584 goto destroy_internal_cb_pool;
8587 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8588 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8589 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8591 if (!hdev->internal_cb_va_base) {
8593 goto destroy_internal_cb_pool;
8596 mutex_lock(&ctx->mmu_lock);
8597 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8598 hdev->internal_cb_pool_dma_addr,
8599 HOST_SPACE_INTERNAL_CB_SZ);
8601 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8602 mutex_unlock(&ctx->mmu_lock);
8605 goto unreserve_internal_cb_pool;
8609 unreserve_internal_cb_pool:
8610 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8611 HOST_SPACE_INTERNAL_CB_SZ);
8612 destroy_internal_cb_pool:
8613 gen_pool_destroy(hdev->internal_cb_pool);
8614 free_internal_cb_pool:
8615 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8616 HOST_SPACE_INTERNAL_CB_SZ,
8617 hdev->internal_cb_pool_virt_addr,
8618 hdev->internal_cb_pool_dma_addr);
8623 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8626 struct gaudi_device *gaudi = hdev->asic_specific;
8628 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8631 mutex_lock(&ctx->mmu_lock);
8632 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8633 HOST_SPACE_INTERNAL_CB_SZ);
8634 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8635 HOST_SPACE_INTERNAL_CB_SZ);
8636 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8637 mutex_unlock(&ctx->mmu_lock);
8639 gen_pool_destroy(hdev->internal_cb_pool);
8641 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8642 HOST_SPACE_INTERNAL_CB_SZ,
8643 hdev->internal_cb_pool_virt_addr,
8644 hdev->internal_cb_pool_dma_addr);
8647 static int gaudi_ctx_init(struct hl_ctx *ctx)
8651 if (ctx->asid == HL_KERNEL_ASID_ID)
8654 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8658 rc = gaudi_restore_user_registers(ctx->hdev);
8660 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8665 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8667 if (ctx->asid == HL_KERNEL_ASID_ID)
8670 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8673 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8675 return gaudi_cq_assignment[cq_idx];
8678 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8680 return sizeof(struct packet_msg_short) +
8681 sizeof(struct packet_msg_prot) * 2;
8684 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8686 return sizeof(struct packet_msg_short) * 4 +
8687 sizeof(struct packet_fence) +
8688 sizeof(struct packet_msg_prot) * 2;
8691 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8693 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8696 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8699 struct hl_cb *cb = (struct hl_cb *) data;
8700 struct packet_msg_short *pkt;
8701 u32 value, ctl, pkt_size = sizeof(*pkt);
8703 pkt = cb->kernel_address + size;
8704 memset(pkt, 0, pkt_size);
8706 /* Inc by 1, Mode ADD */
8707 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8708 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8710 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8711 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8712 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8713 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8714 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8715 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8716 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8718 pkt->value = cpu_to_le32(value);
8719 pkt->ctl = cpu_to_le32(ctl);
8721 return size + pkt_size;
8724 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8727 u32 ctl, pkt_size = sizeof(*pkt);
8729 memset(pkt, 0, pkt_size);
8731 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8732 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8733 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8734 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8735 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8736 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8738 pkt->value = cpu_to_le32(value);
8739 pkt->ctl = cpu_to_le32(ctl);
8744 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8745 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8746 u16 sob_val, u16 mon_id)
8749 u32 ctl, value, pkt_size = sizeof(*pkt);
8750 u16 msg_addr_offset;
8753 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8755 "sob_base %u (mask %#x) is not valid\n",
8756 sob_base, sob_mask);
8761 * monitor_base should be the content of the base0 address registers,
8762 * so it will be added to the msg short offsets
8764 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8767 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8770 memset(pkt, 0, pkt_size);
8772 /* Monitor config packet: bind the monitor to a sync object */
8773 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8774 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8775 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8776 0); /* GREATER OR EQUAL*/
8777 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8779 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8780 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8781 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8782 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8783 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8784 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8785 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8787 pkt->value = cpu_to_le32(value);
8788 pkt->ctl = cpu_to_le32(ctl);
8793 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8795 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8797 memset(pkt, 0, pkt_size);
8799 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8800 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8801 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8803 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8804 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8805 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8806 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8808 pkt->cfg = cpu_to_le32(cfg);
8809 pkt->ctl = cpu_to_le32(ctl);
8814 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8816 u32 offset, nic_index;
8819 case GAUDI_QUEUE_ID_DMA_0_0:
8820 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8822 case GAUDI_QUEUE_ID_DMA_0_1:
8823 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8825 case GAUDI_QUEUE_ID_DMA_0_2:
8826 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8828 case GAUDI_QUEUE_ID_DMA_0_3:
8829 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8831 case GAUDI_QUEUE_ID_DMA_1_0:
8832 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8834 case GAUDI_QUEUE_ID_DMA_1_1:
8835 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8837 case GAUDI_QUEUE_ID_DMA_1_2:
8838 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8840 case GAUDI_QUEUE_ID_DMA_1_3:
8841 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8843 case GAUDI_QUEUE_ID_DMA_5_0:
8844 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8846 case GAUDI_QUEUE_ID_DMA_5_1:
8847 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8849 case GAUDI_QUEUE_ID_DMA_5_2:
8850 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8852 case GAUDI_QUEUE_ID_DMA_5_3:
8853 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8855 case GAUDI_QUEUE_ID_TPC_7_0:
8856 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8858 case GAUDI_QUEUE_ID_TPC_7_1:
8859 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8861 case GAUDI_QUEUE_ID_TPC_7_2:
8862 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8864 case GAUDI_QUEUE_ID_TPC_7_3:
8865 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8867 case GAUDI_QUEUE_ID_NIC_0_0:
8868 case GAUDI_QUEUE_ID_NIC_1_0:
8869 case GAUDI_QUEUE_ID_NIC_2_0:
8870 case GAUDI_QUEUE_ID_NIC_3_0:
8871 case GAUDI_QUEUE_ID_NIC_4_0:
8872 case GAUDI_QUEUE_ID_NIC_5_0:
8873 case GAUDI_QUEUE_ID_NIC_6_0:
8874 case GAUDI_QUEUE_ID_NIC_7_0:
8875 case GAUDI_QUEUE_ID_NIC_8_0:
8876 case GAUDI_QUEUE_ID_NIC_9_0:
8877 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8878 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8879 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8880 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8882 case GAUDI_QUEUE_ID_NIC_0_1:
8883 case GAUDI_QUEUE_ID_NIC_1_1:
8884 case GAUDI_QUEUE_ID_NIC_2_1:
8885 case GAUDI_QUEUE_ID_NIC_3_1:
8886 case GAUDI_QUEUE_ID_NIC_4_1:
8887 case GAUDI_QUEUE_ID_NIC_5_1:
8888 case GAUDI_QUEUE_ID_NIC_6_1:
8889 case GAUDI_QUEUE_ID_NIC_7_1:
8890 case GAUDI_QUEUE_ID_NIC_8_1:
8891 case GAUDI_QUEUE_ID_NIC_9_1:
8892 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8893 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8894 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8895 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8897 case GAUDI_QUEUE_ID_NIC_0_2:
8898 case GAUDI_QUEUE_ID_NIC_1_2:
8899 case GAUDI_QUEUE_ID_NIC_2_2:
8900 case GAUDI_QUEUE_ID_NIC_3_2:
8901 case GAUDI_QUEUE_ID_NIC_4_2:
8902 case GAUDI_QUEUE_ID_NIC_5_2:
8903 case GAUDI_QUEUE_ID_NIC_6_2:
8904 case GAUDI_QUEUE_ID_NIC_7_2:
8905 case GAUDI_QUEUE_ID_NIC_8_2:
8906 case GAUDI_QUEUE_ID_NIC_9_2:
8907 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8908 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8909 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8910 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8912 case GAUDI_QUEUE_ID_NIC_0_3:
8913 case GAUDI_QUEUE_ID_NIC_1_3:
8914 case GAUDI_QUEUE_ID_NIC_2_3:
8915 case GAUDI_QUEUE_ID_NIC_3_3:
8916 case GAUDI_QUEUE_ID_NIC_4_3:
8917 case GAUDI_QUEUE_ID_NIC_5_3:
8918 case GAUDI_QUEUE_ID_NIC_6_3:
8919 case GAUDI_QUEUE_ID_NIC_7_3:
8920 case GAUDI_QUEUE_ID_NIC_8_3:
8921 case GAUDI_QUEUE_ID_NIC_9_3:
8922 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8923 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8924 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8925 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8931 *addr = CFG_BASE + offset;
8936 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8940 u16 msg_addr_offset;
8943 * monitor_base should be the content of the base0 address registers,
8944 * so it will be added to the msg short offsets
8946 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8948 /* First monitor config packet: low address of the sync */
8950 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8953 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8956 /* Second monitor config packet: high address of the sync */
8958 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8961 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8965 * Third monitor config packet: the payload, i.e. what to write when the
8969 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8972 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8977 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8978 struct hl_gen_wait_properties *prop)
8980 struct hl_cb *cb = (struct hl_cb *) prop->data;
8981 void *buf = cb->kernel_address;
8983 u32 size = prop->size;
8985 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8986 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8991 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8992 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8993 prop->sob_mask, prop->sob_val, prop->mon_id);
8994 size += gaudi_add_fence_pkt(buf + size);
8999 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9001 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9003 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9006 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9007 hw_sob->sob_id * 4, 0);
9009 kref_init(&hw_sob->kref);
9012 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9014 hdev->dma_mask = 48;
9017 static u64 gaudi_get_device_time(struct hl_device *hdev)
9019 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9021 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9024 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9025 u32 *block_size, u32 *block_id)
9030 static int gaudi_block_mmap(struct hl_device *hdev,
9031 struct vm_area_struct *vma,
9032 u32 block_id, u32 block_size)
9037 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9039 struct cpu_dyn_regs *dyn_regs =
9040 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9041 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9042 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9043 le32_to_cpu(dyn_regs->gic_host_ints_irq);
9045 WREG32(irq_handler_offset,
9046 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9049 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9052 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9053 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9054 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9055 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9056 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9057 case HL_GAUDI_MME_PLL: return MME_PLL;
9058 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9059 case HL_GAUDI_IF_PLL: return IF_PLL;
9060 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9061 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9062 default: return -EINVAL;
9066 static int gaudi_add_sync_to_engine_map_entry(
9067 struct hl_sync_to_engine_map *map, u32 reg_value,
9068 enum hl_sync_engine_type engine_type, u32 engine_id)
9070 struct hl_sync_to_engine_map_entry *entry;
9072 /* Reg value represents a partial address of sync object,
9073 * it is used as unique identifier. For this we need to
9074 * clear the cutoff cfg base bits from the value.
9076 if (reg_value == 0 || reg_value == 0xffffffff)
9078 reg_value -= (u32)CFG_BASE;
9080 /* create a new hash entry */
9081 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9084 entry->engine_type = engine_type;
9085 entry->engine_id = engine_id;
9086 entry->sync_id = reg_value;
9087 hash_add(map->tb, &entry->node, reg_value);
9092 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9093 struct hl_sync_to_engine_map *map)
9095 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9099 /* Iterate over TPC engines */
9100 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9102 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9103 sds->props[SP_NEXT_TPC] * i);
9105 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9108 goto free_sync_to_engine_map;
9111 /* Iterate over MME engines */
9112 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9113 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9115 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9116 sds->props[SP_NEXT_MME] * i +
9119 rc = gaudi_add_sync_to_engine_map_entry(
9120 map, reg_value, ENGINE_MME,
9121 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9123 goto free_sync_to_engine_map;
9127 /* Iterate over DMA engines */
9128 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9129 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9130 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9131 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9134 goto free_sync_to_engine_map;
9139 free_sync_to_engine_map:
9140 hl_state_dump_free_sync_to_engine_map(map);
9145 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9148 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9152 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9154 const size_t max_write = 10;
9158 /* Sync object ID is calculated as follows:
9159 * (8 * group_id + cleared bits in mask)
9161 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9163 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9166 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9167 max_write; mask >>= 1, i++) {
9169 sob = gid * MONITOR_MAX_SOBS + i;
9172 offset += snprintf(sobs + offset, max_write,
9175 offset += snprintf(sobs + offset, max_write, "%u", sob);
9180 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9181 struct hl_device *hdev,
9182 struct hl_mon_state_dump *mon)
9185 char scratch_buf1[BIN_REG_STRING_SIZE],
9186 scratch_buf2[BIN_REG_STRING_SIZE];
9187 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9189 name = hl_state_dump_get_monitor_name(hdev, mon);
9193 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9195 return hl_snprintf_resize(
9197 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9199 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9201 hl_format_as_binary(
9202 scratch_buf1, sizeof(scratch_buf1),
9204 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9206 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9209 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9210 hl_format_as_binary(
9211 scratch_buf2, sizeof(scratch_buf2),
9213 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9219 static int gaudi_print_fences_single_engine(
9220 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9221 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9222 size_t *size, size_t *offset)
9224 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9225 int rc = -ENOMEM, i;
9226 u32 *statuses, *fences;
9228 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9229 sizeof(*statuses), GFP_KERNEL);
9233 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9234 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9235 sizeof(*fences), GFP_KERNEL);
9239 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9240 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9242 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9243 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9244 fences[i] = RREG32(base_offset + i * sizeof(u32));
9246 /* The actual print */
9247 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9249 u64 fence_cnt, fence_rdata;
9250 const char *engine_name;
9252 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9257 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9258 fence_cnt = base_offset + CFG_BASE +
9260 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9261 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9262 sds->props[SP_FENCE0_RDATA_OFFSET];
9263 engine_name = hl_sync_engine_to_string(engine_type);
9265 rc = hl_snprintf_resize(
9267 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9268 engine_name, engine_id,
9270 fence_cnt, engine_name, engine_id, fence_id, i,
9271 fence_rdata, engine_name, engine_id, fence_id, i,
9289 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9290 .monitor_valid = gaudi_monitor_valid,
9291 .print_single_monitor = gaudi_print_single_monitor,
9292 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9293 .print_fences_single_engine = gaudi_print_fences_single_engine,
9296 static void gaudi_state_dump_init(struct hl_device *hdev)
9298 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9301 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9302 hash_add(sds->so_id_to_str_tb,
9303 &gaudi_so_id_to_str[i].node,
9304 gaudi_so_id_to_str[i].id);
9306 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9307 hash_add(sds->monitor_id_to_str_tb,
9308 &gaudi_monitor_id_to_str[i].node,
9309 gaudi_monitor_id_to_str[i].id);
9311 sds->props = gaudi_state_dump_specs_props;
9313 sds->sync_namager_names = gaudi_sync_manager_names;
9315 sds->funcs = gaudi_state_dump_funcs;
9318 static u32 *gaudi_get_stream_master_qid_arr(void)
9320 return gaudi_stream_master;
9323 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9325 struct hl_device *hdev = dev_get_drvdata(dev);
9326 struct cpucp_info *cpucp_info;
9328 cpucp_info = &hdev->asic_prop.cpucp_info;
9330 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9333 static DEVICE_ATTR_RO(infineon_ver);
9335 static struct attribute *gaudi_vrm_dev_attrs[] = {
9336 &dev_attr_infineon_ver.attr,
9339 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9340 struct attribute_group *dev_vrm_attr_grp)
9342 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9343 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9346 static const struct hl_asic_funcs gaudi_funcs = {
9347 .early_init = gaudi_early_init,
9348 .early_fini = gaudi_early_fini,
9349 .late_init = gaudi_late_init,
9350 .late_fini = gaudi_late_fini,
9351 .sw_init = gaudi_sw_init,
9352 .sw_fini = gaudi_sw_fini,
9353 .hw_init = gaudi_hw_init,
9354 .hw_fini = gaudi_hw_fini,
9355 .halt_engines = gaudi_halt_engines,
9356 .suspend = gaudi_suspend,
9357 .resume = gaudi_resume,
9359 .ring_doorbell = gaudi_ring_doorbell,
9360 .pqe_write = gaudi_pqe_write,
9361 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9362 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9363 .scrub_device_mem = gaudi_scrub_device_mem,
9364 .get_int_queue_base = gaudi_get_int_queue_base,
9365 .test_queues = gaudi_test_queues,
9366 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9367 .asic_dma_pool_free = gaudi_dma_pool_free,
9368 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9369 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9370 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9371 .cs_parser = gaudi_cs_parser,
9372 .asic_dma_map_sg = gaudi_dma_map_sg,
9373 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9374 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9375 .update_eq_ci = gaudi_update_eq_ci,
9376 .context_switch = gaudi_context_switch,
9377 .restore_phase_topology = gaudi_restore_phase_topology,
9378 .debugfs_read32 = gaudi_debugfs_read32,
9379 .debugfs_write32 = gaudi_debugfs_write32,
9380 .debugfs_read64 = gaudi_debugfs_read64,
9381 .debugfs_write64 = gaudi_debugfs_write64,
9382 .debugfs_read_dma = gaudi_debugfs_read_dma,
9383 .add_device_attr = gaudi_add_device_attr,
9384 .handle_eqe = gaudi_handle_eqe,
9385 .get_events_stat = gaudi_get_events_stat,
9386 .read_pte = gaudi_read_pte,
9387 .write_pte = gaudi_write_pte,
9388 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9389 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9390 .send_heartbeat = gaudi_send_heartbeat,
9391 .debug_coresight = gaudi_debug_coresight,
9392 .is_device_idle = gaudi_is_device_idle,
9393 .non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
9394 .hw_queues_lock = gaudi_hw_queues_lock,
9395 .hw_queues_unlock = gaudi_hw_queues_unlock,
9396 .get_pci_id = gaudi_get_pci_id,
9397 .get_eeprom_data = gaudi_get_eeprom_data,
9398 .send_cpu_message = gaudi_send_cpu_message,
9399 .pci_bars_map = gaudi_pci_bars_map,
9400 .init_iatu = gaudi_init_iatu,
9403 .halt_coresight = gaudi_halt_coresight,
9404 .ctx_init = gaudi_ctx_init,
9405 .ctx_fini = gaudi_ctx_fini,
9406 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9407 .load_firmware_to_device = gaudi_load_firmware_to_device,
9408 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9409 .get_signal_cb_size = gaudi_get_signal_cb_size,
9410 .get_wait_cb_size = gaudi_get_wait_cb_size,
9411 .gen_signal_cb = gaudi_gen_signal_cb,
9412 .gen_wait_cb = gaudi_gen_wait_cb,
9413 .reset_sob = gaudi_reset_sob,
9414 .reset_sob_group = gaudi_reset_sob_group,
9415 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9416 .get_device_time = gaudi_get_device_time,
9417 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9418 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9419 .scramble_addr = hl_mmu_scramble_addr,
9420 .descramble_addr = hl_mmu_descramble_addr,
9421 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9422 .get_hw_block_id = gaudi_get_hw_block_id,
9423 .hw_block_mmap = gaudi_block_mmap,
9424 .enable_events_from_fw = gaudi_enable_events_from_fw,
9425 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9426 .init_firmware_loader = gaudi_init_firmware_loader,
9427 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9428 .state_dump_init = gaudi_state_dump_init,
9429 .get_sob_addr = gaudi_get_sob_addr,
9430 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9431 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9435 * gaudi_set_asic_funcs - set GAUDI function pointers
9437 * @hdev: pointer to hl_device structure
9440 void gaudi_set_asic_funcs(struct hl_device *hdev)
9442 hdev->asic_funcs = &gaudi_funcs;