1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85 #define GAUDI_MAX_STRING_LEN 20
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
107 #define GAUDI_PLL_MAX 10
109 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
111 #define MONITOR_SOB_STRING_SIZE 256
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114 GAUDI_QUEUE_ID_DMA_0_0,
115 GAUDI_QUEUE_ID_DMA_0_1,
116 GAUDI_QUEUE_ID_DMA_0_2,
117 GAUDI_QUEUE_ID_DMA_0_3,
118 GAUDI_QUEUE_ID_DMA_1_0,
119 GAUDI_QUEUE_ID_DMA_1_1,
120 GAUDI_QUEUE_ID_DMA_1_2,
121 GAUDI_QUEUE_ID_DMA_1_3
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143 [0] = GAUDI_QUEUE_ID_DMA_0_0,
144 [1] = GAUDI_QUEUE_ID_DMA_0_1,
145 [2] = GAUDI_QUEUE_ID_DMA_0_2,
146 [3] = GAUDI_QUEUE_ID_DMA_0_3,
147 [4] = GAUDI_QUEUE_ID_DMA_1_0,
148 [5] = GAUDI_QUEUE_ID_DMA_1_1,
149 [6] = GAUDI_QUEUE_ID_DMA_1_2,
150 [7] = GAUDI_QUEUE_ID_DMA_1_3,
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
155 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
156 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
157 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
158 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
159 [PACKET_REPEAT] = sizeof(struct packet_repeat),
160 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
161 [PACKET_FENCE] = sizeof(struct packet_fence),
162 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
163 [PACKET_NOP] = sizeof(struct packet_nop),
164 [PACKET_STOP] = sizeof(struct packet_stop),
165 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
166 [PACKET_WAIT] = sizeof(struct packet_wait),
167 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
170 static inline bool validate_packet_id(enum packet_id id)
174 case PACKET_WREG_BULK:
175 case PACKET_MSG_LONG:
176 case PACKET_MSG_SHORT:
179 case PACKET_MSG_PROT:
184 case PACKET_ARB_POINT:
186 case PACKET_LOAD_AND_EXE:
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195 "tpc_address_exceed_slm",
197 "tpc_spu_mac_overflow",
198 "tpc_spu_addsub_overflow",
199 "tpc_spu_abs_overflow",
200 "tpc_spu_fp_dst_nan_inf",
201 "tpc_spu_fp_dst_denorm",
202 "tpc_vpu_mac_overflow",
203 "tpc_vpu_addsub_overflow",
204 "tpc_vpu_abs_overflow",
205 "tpc_vpu_fp_dst_nan_inf",
206 "tpc_vpu_fp_dst_denorm",
208 "tpc_illegal_instruction",
209 "tpc_pc_wrap_around",
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
222 "CP error due to undefined OPCODE",
223 "CP encountered STOP OPCODE",
225 "CP WRREG32 or WRBULK returned error",
227 "FENCE 0 inc over max value and clipped",
228 "FENCE 1 inc over max value and clipped",
229 "FENCE 2 inc over max value and clipped",
230 "FENCE 3 inc over max value and clipped",
231 "FENCE 0 dec under min value and clipped",
232 "FENCE 1 dec under min value and clipped",
233 "FENCE 2 dec under min value and clipped",
234 "FENCE 3 dec under min value and clipped"
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239 "Choice push while full error",
240 "Choice Q watchdog error",
241 "MSG AXI LBW returned with error"
244 enum gaudi_sm_sei_cause {
245 GAUDI_SM_SEI_SO_OVERFLOW,
246 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247 GAUDI_SM_SEI_AXI_RESPONSE_ERR
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
410 static s64 gaudi_state_dump_specs_props[] = {
411 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414 [SP_MON_OBJ_WR_ADDR_LOW] =
415 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416 [SP_MON_OBJ_WR_ADDR_HIGH] =
417 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438 [SP_FENCE0_CNT_OFFSET] =
439 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440 [SP_FENCE0_RDATA_OFFSET] =
441 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
446 /* The order here is opposite to the order of the indexing in the h/w.
447 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
449 static const char * const gaudi_sync_manager_names[] = {
457 struct ecc_info_extract_params {
461 bool disable_clock_gating;
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467 struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471 u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
485 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486 return HL_COLLECTIVE_MASTER;
488 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490 return HL_COLLECTIVE_SLAVE;
492 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494 return HL_COLLECTIVE_SLAVE;
496 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498 return HL_COLLECTIVE_SLAVE;
500 return HL_COLLECTIVE_NOT_SUPPORTED;
503 static inline void set_default_power_values(struct hl_device *hdev)
505 struct asic_fixed_properties *prop = &hdev->asic_prop;
507 if (hdev->card_type == cpucp_card_type_pmc) {
508 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
510 if (prop->fw_security_enabled)
511 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
513 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
515 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
522 struct asic_fixed_properties *prop = &hdev->asic_prop;
523 u32 num_sync_stream_queues = 0;
526 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527 prop->hw_queues_props = kcalloc(prop->max_queues,
528 sizeof(struct hw_queue_properties),
531 if (!prop->hw_queues_props)
534 for (i = 0 ; i < prop->max_queues ; i++) {
535 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537 prop->hw_queues_props[i].driver_only = 0;
538 prop->hw_queues_props[i].supports_sync_stream = 1;
539 prop->hw_queues_props[i].cb_alloc_flags =
541 num_sync_stream_queues++;
542 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544 prop->hw_queues_props[i].driver_only = 1;
545 prop->hw_queues_props[i].supports_sync_stream = 0;
546 prop->hw_queues_props[i].cb_alloc_flags =
548 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550 prop->hw_queues_props[i].driver_only = 0;
551 prop->hw_queues_props[i].supports_sync_stream = 0;
552 prop->hw_queues_props[i].cb_alloc_flags =
556 prop->hw_queues_props[i].collective_mode =
557 get_collective_mode(hdev, i);
560 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562 prop->collective_first_sob = 0;
563 prop->collective_first_mon = 0;
565 /* 2 SOBs per internal queue stream are reserved for collective */
566 prop->sync_stream_first_sob =
567 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568 * QMAN_STREAMS * HL_RSVD_SOBS;
570 /* 1 monitor per internal queue stream are reserved for collective
571 * 2 monitors per external queue stream are reserved for collective
573 prop->sync_stream_first_mon =
574 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575 (NUMBER_OF_EXT_HW_QUEUES * 2);
577 prop->dram_base_address = DRAM_PHYS_BASE;
578 prop->dram_size = GAUDI_HBM_SIZE_32GB;
579 prop->dram_end_address = prop->dram_base_address +
581 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
583 prop->sram_base_address = SRAM_BASE_ADDR;
584 prop->sram_size = SRAM_SIZE;
585 prop->sram_end_address = prop->sram_base_address +
587 prop->sram_user_base_address = prop->sram_base_address +
588 SRAM_USER_BASE_OFFSET;
590 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
592 prop->mmu_pgt_size = 0x800000; /* 8MB */
594 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595 prop->mmu_pte_size = HL_PTE_SIZE;
596 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598 prop->dram_page_size = PAGE_SIZE_2MB;
599 prop->dram_supports_virtual_memory = false;
601 prop->pmmu.hop0_shift = HOP0_SHIFT;
602 prop->pmmu.hop1_shift = HOP1_SHIFT;
603 prop->pmmu.hop2_shift = HOP2_SHIFT;
604 prop->pmmu.hop3_shift = HOP3_SHIFT;
605 prop->pmmu.hop4_shift = HOP4_SHIFT;
606 prop->pmmu.hop0_mask = HOP0_MASK;
607 prop->pmmu.hop1_mask = HOP1_MASK;
608 prop->pmmu.hop2_mask = HOP2_MASK;
609 prop->pmmu.hop3_mask = HOP3_MASK;
610 prop->pmmu.hop4_mask = HOP4_MASK;
611 prop->pmmu.start_addr = VA_HOST_SPACE_START;
612 prop->pmmu.end_addr =
613 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614 prop->pmmu.page_size = PAGE_SIZE_4KB;
615 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
617 /* PMMU and HPMMU are the same except of page size */
618 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
621 /* shifts and masks are the same in PMMU and DMMU */
622 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624 prop->dmmu.end_addr = VA_HOST_SPACE_END;
625 prop->dmmu.page_size = PAGE_SIZE_2MB;
627 prop->cfg_size = CFG_SIZE;
628 prop->max_asid = MAX_ASID;
629 prop->num_of_events = GAUDI_EVENT_SIZE;
630 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
632 set_default_power_values(hdev);
634 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
637 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
640 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
643 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
645 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646 prop->sync_stream_first_sob +
647 (num_sync_stream_queues * HL_RSVD_SOBS);
648 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649 prop->sync_stream_first_mon +
650 (num_sync_stream_queues * HL_RSVD_MONS);
652 prop->first_available_user_msix_interrupt = USHRT_MAX;
654 for (i = 0 ; i < HL_MAX_DCORES ; i++)
655 prop->first_available_cq[i] = USHRT_MAX;
657 prop->fw_cpu_boot_dev_sts0_valid = false;
658 prop->fw_cpu_boot_dev_sts1_valid = false;
659 prop->hard_reset_done_by_fw = false;
660 prop->gic_interrupts_enable = true;
662 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
664 prop->clk_pll_index = HL_GAUDI_MME_PLL;
665 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
670 static int gaudi_pci_bars_map(struct hl_device *hdev)
672 static const char * const name[] = {"SRAM", "CFG", "HBM"};
673 bool is_wc[3] = {false, false, true};
676 rc = hl_pci_bars_map(hdev, name, is_wc);
680 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
681 (CFG_BASE - SPI_FLASH_BASE_ADDR);
686 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
688 struct gaudi_device *gaudi = hdev->asic_specific;
689 struct hl_inbound_pci_region pci_region;
693 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
696 if (hdev->asic_prop.iatu_done_by_fw)
699 /* Inbound Region 2 - Bar 4 - Point to HBM */
700 pci_region.mode = PCI_BAR_MATCH_MODE;
701 pci_region.bar = HBM_BAR_ID;
702 pci_region.addr = addr;
703 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
708 old_addr = gaudi->hbm_bar_cur_addr;
709 gaudi->hbm_bar_cur_addr = addr;
715 static int gaudi_init_iatu(struct hl_device *hdev)
717 struct hl_inbound_pci_region inbound_region;
718 struct hl_outbound_pci_region outbound_region;
721 if (hdev->asic_prop.iatu_done_by_fw)
724 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
725 inbound_region.mode = PCI_BAR_MATCH_MODE;
726 inbound_region.bar = SRAM_BAR_ID;
727 inbound_region.addr = SRAM_BASE_ADDR;
728 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
732 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
733 inbound_region.mode = PCI_BAR_MATCH_MODE;
734 inbound_region.bar = CFG_BAR_ID;
735 inbound_region.addr = SPI_FLASH_BASE_ADDR;
736 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
740 /* Inbound Region 2 - Bar 4 - Point to HBM */
741 inbound_region.mode = PCI_BAR_MATCH_MODE;
742 inbound_region.bar = HBM_BAR_ID;
743 inbound_region.addr = DRAM_PHYS_BASE;
744 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
748 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
750 /* Outbound Region 0 - Point to Host */
751 outbound_region.addr = HOST_PHYS_BASE;
752 outbound_region.size = HOST_PHYS_SIZE;
753 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
759 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
761 return RREG32(mmHW_STATE);
764 static int gaudi_early_init(struct hl_device *hdev)
766 struct asic_fixed_properties *prop = &hdev->asic_prop;
767 struct pci_dev *pdev = hdev->pdev;
771 rc = gaudi_set_fixed_properties(hdev);
773 dev_err(hdev->dev, "Failed setting fixed properties\n");
777 /* Check BAR sizes */
778 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
780 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
782 (unsigned long long) pci_resource_len(pdev,
786 goto free_queue_props;
789 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
791 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
793 (unsigned long long) pci_resource_len(pdev,
797 goto free_queue_props;
800 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
801 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
803 /* If FW security is enabled at this point it means no access to ELBI */
804 if (hdev->asic_prop.fw_security_enabled) {
805 hdev->asic_prop.iatu_done_by_fw = true;
808 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
809 * decision can only be taken based on PCI ID security.
811 hdev->asic_prop.gic_interrupts_enable = false;
815 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
818 goto free_queue_props;
820 /* Check whether FW is configuring iATU */
821 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
822 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
823 hdev->asic_prop.iatu_done_by_fw = true;
826 rc = hl_pci_init(hdev);
828 goto free_queue_props;
830 /* Before continuing in the initialization, we need to read the preboot
831 * version to determine whether we run with a security-enabled firmware
833 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
835 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
837 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
839 if (hdev->reset_on_preboot_fail)
840 hdev->asic_funcs->hw_fini(hdev, true, false);
844 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
846 "H/W state is dirty, must reset before initializing\n");
847 hdev->asic_funcs->hw_fini(hdev, true, false);
855 kfree(hdev->asic_prop.hw_queues_props);
859 static int gaudi_early_fini(struct hl_device *hdev)
861 kfree(hdev->asic_prop.hw_queues_props);
868 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
870 * @hdev: pointer to hl_device structure
873 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
875 struct asic_fixed_properties *prop = &hdev->asic_prop;
876 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
877 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
880 if (hdev->asic_prop.fw_security_enabled) {
881 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
886 freq = pll_freq_arr[2];
888 /* Backward compatibility */
889 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
890 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
891 nr = RREG32(mmPSOC_CPU_PLL_NR);
892 nf = RREG32(mmPSOC_CPU_PLL_NF);
893 od = RREG32(mmPSOC_CPU_PLL_OD);
895 if (div_sel == DIV_SEL_REF_CLK ||
896 div_sel == DIV_SEL_DIVIDED_REF) {
897 if (div_sel == DIV_SEL_REF_CLK)
900 freq = PLL_REF_CLK / (div_fctr + 1);
901 } else if (div_sel == DIV_SEL_PLL_CLK ||
902 div_sel == DIV_SEL_DIVIDED_PLL) {
903 pll_clk = PLL_REF_CLK * (nf + 1) /
904 ((nr + 1) * (od + 1));
905 if (div_sel == DIV_SEL_PLL_CLK)
908 freq = pll_clk / (div_fctr + 1);
911 "Received invalid div select value: %d",
917 prop->psoc_timestamp_frequency = freq;
918 prop->psoc_pci_pll_nr = nr;
919 prop->psoc_pci_pll_nf = nf;
920 prop->psoc_pci_pll_od = od;
921 prop->psoc_pci_pll_div_factor = div_fctr;
926 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
927 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
929 struct asic_fixed_properties *prop = &hdev->asic_prop;
930 struct packet_lin_dma *init_tpc_mem_pkt;
931 struct hl_cs_job *job;
938 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
942 init_tpc_mem_pkt = cb->kernel_address;
943 cb_size = sizeof(*init_tpc_mem_pkt);
944 memset(init_tpc_mem_pkt, 0, cb_size);
946 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
948 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
949 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
950 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
951 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
953 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
955 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
956 dst_addr = (prop->sram_user_base_address &
957 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
958 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
959 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
961 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
963 dev_err(hdev->dev, "Failed to allocate a new job\n");
970 atomic_inc(&job->user_cb->cs_cnt);
971 job->user_cb_size = cb_size;
972 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
973 job->patched_cb = job->user_cb;
974 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
976 hl_debugfs_add_job(hdev, job);
978 rc = gaudi_send_job_on_qman0(hdev, job);
983 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
984 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
990 hl_userptr_delete_list(hdev, &job->userptr_list);
991 hl_debugfs_remove_job(hdev, job);
993 atomic_dec(&cb->cs_cnt);
997 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1003 * gaudi_init_tpc_mem() - Initialize TPC memories.
1004 * @hdev: Pointer to hl_device structure.
1006 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1008 * Return: 0 for success, negative value for error.
1010 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1012 const struct firmware *fw;
1015 dma_addr_t dma_handle;
1019 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1020 if (rc == -EINTR && count-- > 0) {
1026 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1032 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1033 &dma_handle, GFP_KERNEL | __GFP_ZERO);
1036 "Failed to allocate %zu of dma memory for TPC kernel\n",
1042 memcpy(cpu_addr, fw->data, fw_size);
1044 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1046 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1050 release_firmware(fw);
1054 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1056 struct gaudi_device *gaudi = hdev->asic_specific;
1057 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1058 struct hl_hw_queue *q;
1059 u32 i, sob_id, sob_group_id, queue_id;
1061 /* Iterate through SOB groups and assign a SOB for each slave queue */
1063 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1064 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1066 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1067 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1068 q = &hdev->kernel_queues[queue_id + (4 * i)];
1069 q->sync_stream_prop.collective_sob_id = sob_id + i;
1072 /* Both DMA5 and TPC7 use the same resources since only a single
1073 * engine need to participate in the reduction process
1075 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1076 q = &hdev->kernel_queues[queue_id];
1077 q->sync_stream_prop.collective_sob_id =
1078 sob_id + NIC_NUMBER_OF_ENGINES;
1080 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1081 q = &hdev->kernel_queues[queue_id];
1082 q->sync_stream_prop.collective_sob_id =
1083 sob_id + NIC_NUMBER_OF_ENGINES;
1086 static void gaudi_sob_group_hw_reset(struct kref *ref)
1088 struct gaudi_hw_sob_group *hw_sob_group =
1089 container_of(ref, struct gaudi_hw_sob_group, kref);
1090 struct hl_device *hdev = hw_sob_group->hdev;
1093 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1094 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1095 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1097 kref_init(&hw_sob_group->kref);
1100 static void gaudi_sob_group_reset_error(struct kref *ref)
1102 struct gaudi_hw_sob_group *hw_sob_group =
1103 container_of(ref, struct gaudi_hw_sob_group, kref);
1104 struct hl_device *hdev = hw_sob_group->hdev;
1107 "SOB release shouldn't be called here, base_sob_id: %d\n",
1108 hw_sob_group->base_sob_id);
1111 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1113 struct gaudi_collective_properties *prop;
1116 prop = &gaudi->collective_props;
1118 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1120 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1121 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1122 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1123 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1124 /* Set collective engine bit */
1125 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1126 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1129 static int gaudi_collective_init(struct hl_device *hdev)
1131 u32 i, sob_id, reserved_sobs_per_group;
1132 struct gaudi_collective_properties *prop;
1133 struct gaudi_device *gaudi;
1135 gaudi = hdev->asic_specific;
1136 prop = &gaudi->collective_props;
1137 sob_id = hdev->asic_prop.collective_first_sob;
1139 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1140 reserved_sobs_per_group =
1141 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1143 /* Init SOB groups */
1144 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1145 prop->hw_sob_group[i].hdev = hdev;
1146 prop->hw_sob_group[i].base_sob_id = sob_id;
1147 sob_id += reserved_sobs_per_group;
1148 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1151 for (i = 0 ; i < QMAN_STREAMS; i++) {
1152 prop->next_sob_group_val[i] = 1;
1153 prop->curr_sob_group_idx[i] = 0;
1154 gaudi_collective_map_sobs(hdev, i);
1157 gaudi_collective_mstr_sob_mask_set(gaudi);
1162 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1164 struct gaudi_device *gaudi = hdev->asic_specific;
1165 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1167 kref_put(&cprop->hw_sob_group[sob_group].kref,
1168 gaudi_sob_group_hw_reset);
1171 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1172 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1174 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1175 struct gaudi_collective_properties *cprop;
1176 struct hl_gen_wait_properties wait_prop;
1177 struct hl_sync_stream_properties *prop;
1178 struct gaudi_device *gaudi;
1180 gaudi = hdev->asic_specific;
1181 cprop = &gaudi->collective_props;
1182 queue_id = job->hw_queue_id;
1183 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1186 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1187 master_monitor = prop->collective_mstr_mon_id[0];
1189 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1192 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1193 master_sob_base, cprop->mstr_sob_mask[0],
1194 cprop->next_sob_group_val[stream],
1195 master_monitor, queue_id);
1197 wait_prop.data = (void *) job->patched_cb;
1198 wait_prop.sob_base = master_sob_base;
1199 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1200 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1201 wait_prop.mon_id = master_monitor;
1202 wait_prop.q_idx = queue_id;
1203 wait_prop.size = cb_size;
1204 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1206 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1207 master_monitor = prop->collective_mstr_mon_id[1];
1210 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1211 master_sob_base, cprop->mstr_sob_mask[1],
1212 cprop->next_sob_group_val[stream],
1213 master_monitor, queue_id);
1215 wait_prop.sob_base = master_sob_base;
1216 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1217 wait_prop.mon_id = master_monitor;
1218 wait_prop.size = cb_size;
1219 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1222 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1223 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1225 struct hl_gen_wait_properties wait_prop;
1226 struct hl_sync_stream_properties *prop;
1227 u32 queue_id, cb_size = 0;
1229 queue_id = job->hw_queue_id;
1230 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1232 if (job->cs->encaps_signals) {
1233 /* use the encaps signal handle store earlier in the flow
1234 * and set the SOB information from the encaps
1237 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1240 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1242 cs_cmpl->hw_sob->sob_id,
1246 /* Add to wait CBs using slave monitor */
1247 wait_prop.data = (void *) job->user_cb;
1248 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1249 wait_prop.sob_mask = 0x1;
1250 wait_prop.sob_val = cs_cmpl->sob_val;
1251 wait_prop.mon_id = prop->collective_slave_mon_id;
1252 wait_prop.q_idx = queue_id;
1253 wait_prop.size = cb_size;
1256 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1257 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1258 prop->collective_slave_mon_id, queue_id);
1260 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1263 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1264 prop->collective_sob_id, queue_id);
1266 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1267 prop->collective_sob_id, cb_size, false);
1270 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1272 struct hl_cs_compl *signal_cs_cmpl =
1273 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1274 struct hl_cs_compl *cs_cmpl =
1275 container_of(cs->fence, struct hl_cs_compl, base_fence);
1276 struct gaudi_collective_properties *cprop;
1277 u32 stream, queue_id, sob_group_offset;
1278 struct gaudi_device *gaudi;
1279 struct hl_device *hdev;
1280 struct hl_cs_job *job;
1285 gaudi = hdev->asic_specific;
1286 cprop = &gaudi->collective_props;
1288 /* In encaps signals case the SOB info will be retrieved from
1289 * the handle in gaudi_collective_slave_init_job.
1291 if (!cs->encaps_signals) {
1292 /* copy the SOB id and value of the signal CS */
1293 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1294 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1297 /* check again if the signal cs already completed.
1298 * if yes then don't send any wait cs since the hw_sob
1299 * could be in reset already. if signal is not completed
1300 * then get refcount to hw_sob to prevent resetting the sob
1301 * while wait cs is not submitted.
1302 * note that this check is protected by two locks,
1303 * hw queue lock and completion object lock,
1304 * and the same completion object lock also protects
1305 * the hw_sob reset handler function.
1306 * The hw_queue lock prevent out of sync of hw_sob
1307 * refcount value, changed by signal/wait flows.
1309 spin_lock(&signal_cs_cmpl->lock);
1311 if (completion_done(&cs->signal_fence->completion)) {
1312 spin_unlock(&signal_cs_cmpl->lock);
1315 /* Increment kref since all slave queues are now waiting on it */
1316 kref_get(&cs_cmpl->hw_sob->kref);
1318 spin_unlock(&signal_cs_cmpl->lock);
1320 /* Calculate the stream from collective master queue (1st job) */
1321 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1322 stream = job->hw_queue_id % 4;
1324 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1326 list_for_each_entry(job, &cs->job_list, cs_node) {
1327 queue_id = job->hw_queue_id;
1329 if (hdev->kernel_queues[queue_id].collective_mode ==
1330 HL_COLLECTIVE_MASTER)
1331 gaudi_collective_master_init_job(hdev, job, stream,
1334 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1337 cs_cmpl->sob_group = sob_group_offset;
1339 /* Handle sob group kref and wraparound */
1340 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1341 cprop->next_sob_group_val[stream]++;
1343 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1345 * Decrement as we reached the max value.
1346 * The release function won't be called here as we've
1347 * just incremented the refcount.
1349 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1350 gaudi_sob_group_reset_error);
1351 cprop->next_sob_group_val[stream] = 1;
1352 /* only two SOBs are currently in use */
1353 cprop->curr_sob_group_idx[stream] =
1354 (cprop->curr_sob_group_idx[stream] + 1) &
1357 gaudi_collective_map_sobs(hdev, stream);
1359 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1360 cprop->curr_sob_group_idx[stream], stream);
1364 hl_fence_put(cs->signal_fence);
1365 cs->signal_fence = NULL;
1370 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1371 struct hl_ctx *ctx, struct hl_cs *cs,
1372 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1373 u32 encaps_signal_offset)
1375 struct hw_queue_properties *hw_queue_prop;
1376 struct hl_cs_counters_atomic *cntr;
1377 struct hl_cs_job *job;
1382 cntr = &hdev->aggregated_cs_counters;
1384 if (mode == HL_COLLECTIVE_MASTER) {
1385 /* CB size of collective master queue contains
1386 * 4 msg short packets for monitor 1 configuration
1388 * 4 msg short packets for monitor 2 configuration
1390 * 2 msg prot packets for completion and MSI-X
1392 cb_size = sizeof(struct packet_msg_short) * 8 +
1393 sizeof(struct packet_fence) * 2 +
1394 sizeof(struct packet_msg_prot) * 2;
1397 /* CB size of collective slave queues contains
1398 * 4 msg short packets for monitor configuration
1400 * 1 additional msg short packet for sob signal
1402 cb_size = sizeof(struct packet_msg_short) * 5 +
1403 sizeof(struct packet_fence);
1407 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1408 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1410 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1411 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1412 dev_err(hdev->dev, "Failed to allocate a new job\n");
1416 /* Allocate internal mapped CB for non patched CBs */
1417 cb = hl_cb_kernel_create(hdev, cb_size,
1418 hdev->mmu_enable && !patched_cb);
1420 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1421 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1429 atomic_inc(&job->user_cb->cs_cnt);
1430 job->user_cb_size = cb_size;
1431 job->hw_queue_id = queue_id;
1433 /* since its guaranteed to have only one chunk in the collective wait
1434 * cs, we can use this chunk to set the encapsulated signal offset
1437 if (cs->encaps_signals)
1438 job->encaps_sig_wait_offset = encaps_signal_offset;
1441 * No need in parsing, user CB is the patched CB.
1442 * We call hl_cb_destroy() out of two reasons - we don't need
1443 * the CB in the CB idr anymore and to decrement its refcount as
1444 * it was incremented inside hl_cb_kernel_create().
1447 job->patched_cb = job->user_cb;
1449 job->patched_cb = NULL;
1451 job->job_cb_size = job->user_cb_size;
1452 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1454 /* increment refcount as for external queues we get completion */
1455 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1458 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1460 list_add_tail(&job->cs_node, &cs->job_list);
1462 hl_debugfs_add_job(hdev, job);
1467 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1468 struct hl_ctx *ctx, struct hl_cs *cs,
1469 u32 wait_queue_id, u32 collective_engine_id,
1470 u32 encaps_signal_offset)
1472 struct gaudi_device *gaudi = hdev->asic_specific;
1473 struct hw_queue_properties *hw_queue_prop;
1474 u32 queue_id, collective_queue, num_jobs;
1475 u32 stream, nic_queue, nic_idx = 0;
1479 /* Verify wait queue id is configured as master */
1480 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1481 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1483 "Queue %d is not configured as collective master\n",
1488 /* Verify engine id is supported */
1489 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1490 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1492 "Collective wait does not support engine %u\n",
1493 collective_engine_id);
1497 stream = wait_queue_id % 4;
1499 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1500 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1502 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1504 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1505 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1507 /* First job goes to the collective master queue, it will wait for
1508 * the collective slave queues to finish execution.
1509 * The synchronization is done using two monitors:
1510 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1511 * reduction engine (DMA5/TPC7).
1513 * Rest of the jobs goes to the collective slave queues which will
1514 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1516 for (i = 0 ; i < num_jobs ; i++) {
1518 queue_id = wait_queue_id;
1519 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1520 HL_COLLECTIVE_MASTER, queue_id,
1521 wait_queue_id, encaps_signal_offset);
1523 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1524 if (gaudi->hw_cap_initialized &
1525 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1530 queue_id = nic_queue;
1537 queue_id = collective_queue;
1540 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1541 HL_COLLECTIVE_SLAVE, queue_id,
1542 wait_queue_id, encaps_signal_offset);
1552 static int gaudi_late_init(struct hl_device *hdev)
1554 struct gaudi_device *gaudi = hdev->asic_specific;
1557 rc = gaudi->cpucp_info_get(hdev);
1559 dev_err(hdev->dev, "Failed to get cpucp info\n");
1563 if ((hdev->card_type == cpucp_card_type_pci) &&
1564 (hdev->nic_ports_mask & 0x3)) {
1566 "PCI card detected, only 8 ports are enabled\n");
1567 hdev->nic_ports_mask &= ~0x3;
1569 /* Stop and disable unused NIC QMANs */
1570 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1574 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1575 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1576 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1578 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1579 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1581 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1584 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1586 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1590 /* Scrub both SRAM and DRAM */
1591 rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1593 goto disable_pci_access;
1595 rc = gaudi_fetch_psoc_frequency(hdev);
1597 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1598 goto disable_pci_access;
1601 rc = gaudi_mmu_clear_pgt_range(hdev);
1603 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1604 goto disable_pci_access;
1607 rc = gaudi_init_tpc_mem(hdev);
1609 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1610 goto disable_pci_access;
1613 rc = gaudi_collective_init(hdev);
1615 dev_err(hdev->dev, "Failed to init collective\n");
1616 goto disable_pci_access;
1619 /* We only support a single ASID for the user, so for the sake of optimization, just
1620 * initialize the ASID one time during device initialization with the fixed value of 1
1622 gaudi_mmu_prepare(hdev, 1);
1627 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1632 static void gaudi_late_fini(struct hl_device *hdev)
1634 const struct hwmon_channel_info **channel_info_arr;
1637 if (!hdev->hl_chip_info->info)
1640 channel_info_arr = hdev->hl_chip_info->info;
1642 while (channel_info_arr[i]) {
1643 kfree(channel_info_arr[i]->config);
1644 kfree(channel_info_arr[i]);
1648 kfree(channel_info_arr);
1650 hdev->hl_chip_info->info = NULL;
1653 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1655 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1656 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1660 * The device CPU works with 40-bits addresses, while bit 39 must be set
1661 * to '1' when accessing the host.
1662 * Bits 49:39 of the full host address are saved for a later
1663 * configuration of the HW to perform extension to 50 bits.
1664 * Because there is a single HW register that holds the extension bits,
1665 * these bits must be identical in all allocated range.
1668 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1670 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1671 HL_CPU_ACCESSIBLE_MEM_SIZE,
1673 GFP_KERNEL | __GFP_ZERO);
1674 if (!virt_addr_arr[i]) {
1676 goto free_dma_mem_arr;
1679 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1680 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1681 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1685 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1687 "MSB of CPU accessible DMA memory are not identical in all range\n");
1689 goto free_dma_mem_arr;
1692 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1693 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1694 hdev->cpu_pci_msb_addr =
1695 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1697 if (!hdev->asic_prop.fw_security_enabled)
1698 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1701 for (j = 0 ; j < i ; j++)
1702 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1703 HL_CPU_ACCESSIBLE_MEM_SIZE,
1710 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1712 struct gaudi_device *gaudi = hdev->asic_specific;
1713 struct gaudi_internal_qman_info *q;
1716 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1717 q = &gaudi->internal_qmans[i];
1718 if (!q->pq_kernel_addr)
1720 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1726 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1728 struct gaudi_device *gaudi = hdev->asic_specific;
1729 struct gaudi_internal_qman_info *q;
1732 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1733 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1736 q = &gaudi->internal_qmans[i];
1739 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1740 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1742 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1743 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1745 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1746 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1748 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1749 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1752 dev_err(hdev->dev, "Bad internal queue index %d", i);
1754 goto free_internal_qmans_pq_mem;
1757 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1760 GFP_KERNEL | __GFP_ZERO);
1761 if (!q->pq_kernel_addr) {
1763 goto free_internal_qmans_pq_mem;
1769 free_internal_qmans_pq_mem:
1770 gaudi_free_internal_qmans_pq_mem(hdev);
1774 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1776 struct asic_fixed_properties *prop = &hdev->asic_prop;
1777 struct pci_mem_region *region;
1780 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1781 region->region_base = CFG_BASE;
1782 region->region_size = CFG_SIZE;
1783 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1784 region->bar_size = CFG_BAR_SIZE;
1785 region->bar_id = CFG_BAR_ID;
1789 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1790 region->region_base = SRAM_BASE_ADDR;
1791 region->region_size = SRAM_SIZE;
1792 region->offset_in_bar = 0;
1793 region->bar_size = SRAM_BAR_SIZE;
1794 region->bar_id = SRAM_BAR_ID;
1798 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1799 region->region_base = DRAM_PHYS_BASE;
1800 region->region_size = hdev->asic_prop.dram_size;
1801 region->offset_in_bar = 0;
1802 region->bar_size = prop->dram_pci_bar_size;
1803 region->bar_id = HBM_BAR_ID;
1807 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1808 region->region_base = PSOC_SCRATCHPAD_ADDR;
1809 region->region_size = PSOC_SCRATCHPAD_SIZE;
1810 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1811 region->bar_size = CFG_BAR_SIZE;
1812 region->bar_id = CFG_BAR_ID;
1816 static int gaudi_sw_init(struct hl_device *hdev)
1818 struct gaudi_device *gaudi;
1819 u32 i, event_id = 0;
1822 /* Allocate device structure */
1823 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1827 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1828 if (gaudi_irq_map_table[i].valid) {
1829 if (event_id == GAUDI_EVENT_SIZE) {
1831 "Event array exceeds the limit of %u events\n",
1834 goto free_gaudi_device;
1837 gaudi->events[event_id++] =
1838 gaudi_irq_map_table[i].fc_id;
1842 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1844 hdev->asic_specific = gaudi;
1846 /* Create DMA pool for small allocations */
1847 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1848 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1849 if (!hdev->dma_pool) {
1850 dev_err(hdev->dev, "failed to create DMA pool\n");
1852 goto free_gaudi_device;
1855 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1859 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1860 if (!hdev->cpu_accessible_dma_pool) {
1862 "Failed to create CPU accessible DMA pool\n");
1864 goto free_cpu_dma_mem;
1867 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1868 (uintptr_t) hdev->cpu_accessible_dma_mem,
1869 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1872 "Failed to add memory to CPU accessible DMA pool\n");
1874 goto free_cpu_accessible_dma_pool;
1877 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1879 goto free_cpu_accessible_dma_pool;
1881 spin_lock_init(&gaudi->hw_queues_lock);
1882 mutex_init(&gaudi->clk_gate_mutex);
1884 hdev->supports_sync_stream = true;
1885 hdev->supports_coresight = true;
1886 hdev->supports_staged_submission = true;
1887 hdev->supports_wait_for_multi_cs = true;
1889 hdev->asic_funcs->set_pci_memory_regions(hdev);
1890 hdev->stream_master_qid_arr =
1891 hdev->asic_funcs->get_stream_master_qid_arr();
1892 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1896 free_cpu_accessible_dma_pool:
1897 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1899 if (!hdev->asic_prop.fw_security_enabled)
1900 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1901 hdev->cpu_pci_msb_addr);
1902 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1903 HL_CPU_ACCESSIBLE_MEM_SIZE,
1904 hdev->cpu_accessible_dma_mem,
1905 hdev->cpu_accessible_dma_address);
1907 dma_pool_destroy(hdev->dma_pool);
1913 static int gaudi_sw_fini(struct hl_device *hdev)
1915 struct gaudi_device *gaudi = hdev->asic_specific;
1917 gaudi_free_internal_qmans_pq_mem(hdev);
1919 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1921 if (!hdev->asic_prop.fw_security_enabled)
1922 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1923 hdev->cpu_pci_msb_addr);
1925 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1926 HL_CPU_ACCESSIBLE_MEM_SIZE,
1927 hdev->cpu_accessible_dma_mem,
1928 hdev->cpu_accessible_dma_address);
1930 dma_pool_destroy(hdev->dma_pool);
1932 mutex_destroy(&gaudi->clk_gate_mutex);
1939 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1941 struct hl_device *hdev = arg;
1947 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1948 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1950 hl_irq_handler_eq(irq, &hdev->event_queue);
1956 * For backward compatibility, new MSI interrupts should be set after the
1957 * existing CPU and NIC interrupts.
1959 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1964 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1965 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1966 GAUDI_EVENT_QUEUE_MSI_IDX);
1968 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1969 (nr + NIC_NUMBER_OF_ENGINES + 1);
1971 return pci_irq_vector(hdev->pdev, msi_vec);
1974 static int gaudi_enable_msi_single(struct hl_device *hdev)
1978 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1980 irq = gaudi_pci_irq_vector(hdev, 0, false);
1981 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1982 "gaudi single msi", hdev);
1985 "Failed to request single MSI IRQ\n");
1990 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1992 int cq_cnt = hdev->asic_prop.completion_queues_count;
1993 int rc, i, irq_cnt_init, irq;
1995 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1996 irq = gaudi_pci_irq_vector(hdev, i, false);
1997 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1998 &hdev->completion_queue[i]);
2000 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2005 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2006 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2007 &hdev->event_queue);
2009 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2016 for (i = 0 ; i < irq_cnt_init ; i++)
2017 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2018 &hdev->completion_queue[i]);
2022 static int gaudi_enable_msi(struct hl_device *hdev)
2024 struct gaudi_device *gaudi = hdev->asic_specific;
2027 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2030 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2032 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2036 if (rc < NUMBER_OF_INTERRUPTS) {
2037 gaudi->multi_msi_mode = false;
2038 rc = gaudi_enable_msi_single(hdev);
2040 gaudi->multi_msi_mode = true;
2041 rc = gaudi_enable_msi_multi(hdev);
2045 goto free_pci_irq_vectors;
2047 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2051 free_pci_irq_vectors:
2052 pci_free_irq_vectors(hdev->pdev);
2056 static void gaudi_sync_irqs(struct hl_device *hdev)
2058 struct gaudi_device *gaudi = hdev->asic_specific;
2059 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2061 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2064 /* Wait for all pending IRQs to be finished */
2065 if (gaudi->multi_msi_mode) {
2066 for (i = 0 ; i < cq_cnt ; i++)
2067 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2069 synchronize_irq(gaudi_pci_irq_vector(hdev,
2070 GAUDI_EVENT_QUEUE_MSI_IDX,
2073 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2077 static void gaudi_disable_msi(struct hl_device *hdev)
2079 struct gaudi_device *gaudi = hdev->asic_specific;
2080 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2082 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2085 gaudi_sync_irqs(hdev);
2087 if (gaudi->multi_msi_mode) {
2088 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2090 free_irq(irq, &hdev->event_queue);
2092 for (i = 0 ; i < cq_cnt ; i++) {
2093 irq = gaudi_pci_irq_vector(hdev, i, false);
2094 free_irq(irq, &hdev->completion_queue[i]);
2097 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2100 pci_free_irq_vectors(hdev->pdev);
2102 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2105 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2107 struct gaudi_device *gaudi = hdev->asic_specific;
2109 if (hdev->asic_prop.fw_security_enabled)
2112 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2113 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2116 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2119 if (!hdev->sram_scrambler_enable)
2122 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2123 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2125 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2127 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2129 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2131 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2133 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2135 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2137 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2140 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2142 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2144 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2146 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2148 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2150 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2152 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2154 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2157 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2159 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2161 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2163 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2165 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2167 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2169 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2171 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2173 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2176 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2178 struct gaudi_device *gaudi = hdev->asic_specific;
2180 if (hdev->asic_prop.fw_security_enabled)
2183 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2184 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2187 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2190 if (!hdev->dram_scrambler_enable)
2193 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2194 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2196 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2198 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2200 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2202 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2204 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2206 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2208 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2211 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2213 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2215 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2217 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2219 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2221 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2223 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2225 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2228 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2230 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2232 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2234 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2236 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2238 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2240 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2242 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2244 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2247 static void gaudi_init_e2e(struct hl_device *hdev)
2249 if (hdev->asic_prop.fw_security_enabled)
2252 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2253 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2256 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2257 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2258 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2259 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2261 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2262 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2263 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2264 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2266 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2267 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2268 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2269 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2271 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2272 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2273 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2274 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2276 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2277 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2278 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2279 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2281 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2282 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2283 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2284 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2286 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2287 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2288 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2289 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2291 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2292 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2293 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2294 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2296 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2297 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2298 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2299 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2301 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2302 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2303 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2304 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2306 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2307 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2308 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2309 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2311 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2312 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2313 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2314 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2316 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2317 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2318 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2319 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2321 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2322 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2323 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2324 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2326 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2327 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2328 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2329 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2331 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2332 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2333 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2334 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2336 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2337 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2338 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2339 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2341 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2342 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2343 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2344 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2346 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2347 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2348 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2349 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2351 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2352 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2353 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2354 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2356 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2357 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2358 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2359 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2361 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2362 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2363 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2364 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2366 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2367 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2368 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2369 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2371 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2372 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2373 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2374 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2376 if (!hdev->dram_scrambler_enable) {
2377 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2378 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2379 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2380 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2382 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2383 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2384 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2385 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2387 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2388 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2389 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2390 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2392 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2393 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2394 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2395 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2397 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2398 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2399 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2400 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2402 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2403 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2404 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2405 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2407 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2408 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2409 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2410 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2412 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2413 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2414 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2415 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2417 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2418 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2419 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2420 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2422 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2423 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2424 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2425 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2427 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2428 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2429 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2430 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2432 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2433 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2434 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2435 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2437 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2438 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2439 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2440 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2442 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2443 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2444 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2445 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2447 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2448 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2449 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2450 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2452 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2453 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2454 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2455 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2457 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2458 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2459 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2460 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2462 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2463 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2464 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2465 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2467 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2468 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2469 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2470 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2472 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2473 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2474 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2475 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2477 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2478 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2479 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2480 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2482 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2483 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2484 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2485 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2487 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2488 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2489 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2490 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2492 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2493 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2494 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2495 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2498 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2499 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2500 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2501 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2503 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2504 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2505 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2506 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2508 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2509 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2510 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2511 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2513 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2514 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2515 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2516 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2518 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2519 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2520 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2521 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2523 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2524 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2525 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2526 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2528 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2529 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2530 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2531 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2533 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2534 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2535 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2536 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2538 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2539 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2540 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2541 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2543 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2544 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2545 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2546 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2548 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2549 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2550 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2551 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2553 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2554 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2555 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2556 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2558 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2559 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2560 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2561 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2563 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2564 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2565 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2566 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2568 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2569 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2570 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2571 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2573 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2574 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2575 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2576 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2578 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2579 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2580 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2581 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2583 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2584 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2585 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2586 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2588 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2589 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2590 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2591 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2593 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2594 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2595 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2596 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2598 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2599 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2600 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2601 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2603 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2604 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2605 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2606 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2608 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2609 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2610 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2611 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2613 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2614 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2615 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2616 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2619 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2621 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2623 if (hdev->asic_prop.fw_security_enabled)
2626 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2627 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2630 hbm0_wr = 0x33333333;
2631 hbm0_rd = 0x77777777;
2632 hbm1_wr = 0x55555555;
2633 hbm1_rd = 0xDDDDDDDD;
2635 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2636 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2637 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2638 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2640 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2641 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2642 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2643 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2645 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2646 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2647 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2648 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2650 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2651 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2652 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2653 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2655 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2656 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2657 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2658 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2659 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2660 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2661 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2662 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2663 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2664 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2665 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2666 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2668 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2669 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2670 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2671 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2672 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2673 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2674 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2675 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2676 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2677 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2678 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2679 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2682 static void gaudi_init_golden_registers(struct hl_device *hdev)
2687 gaudi_init_e2e(hdev);
2688 gaudi_init_hbm_cred(hdev);
2690 for (tpc_id = 0, tpc_offset = 0;
2691 tpc_id < TPC_NUMBER_OF_ENGINES;
2692 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2693 /* Mask all arithmetic interrupts from TPC */
2694 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2695 /* Set 16 cache lines */
2696 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2697 ICACHE_FETCH_LINE_NUM, 2);
2700 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2701 for (i = 0 ; i < 128 ; i += 8)
2702 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2704 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2707 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2710 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2711 int qman_id, dma_addr_t qman_pq_addr)
2713 struct cpu_dyn_regs *dyn_regs =
2714 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2715 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2716 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2717 u32 q_off, dma_qm_offset;
2718 u32 dma_qm_err_cfg, irq_handler_offset;
2720 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2722 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2723 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2725 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2726 so_base_en_lo = lower_32_bits(CFG_BASE +
2727 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728 so_base_en_hi = upper_32_bits(CFG_BASE +
2729 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2730 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2731 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2733 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2734 so_base_ws_lo = lower_32_bits(CFG_BASE +
2735 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736 so_base_ws_hi = upper_32_bits(CFG_BASE +
2737 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2739 q_off = dma_qm_offset + qman_id * 4;
2741 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2742 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2744 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2745 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2746 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2748 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2749 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2750 QMAN_LDMA_SRC_OFFSET);
2751 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2752 QMAN_LDMA_DST_OFFSET);
2754 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2755 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2756 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2757 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2758 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2759 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2760 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2761 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2763 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2765 /* The following configuration is needed only once per QMAN */
2767 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2768 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2769 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2771 /* Configure RAZWI IRQ */
2772 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2773 if (hdev->stop_on_err)
2775 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2777 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2779 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2780 lower_32_bits(CFG_BASE + irq_handler_offset));
2781 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2782 upper_32_bits(CFG_BASE + irq_handler_offset));
2784 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2785 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2788 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2789 QM_ARB_ERR_MSG_EN_MASK);
2791 /* Increase ARB WDT to support streams architecture */
2792 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2793 GAUDI_ARB_WDT_TIMEOUT);
2795 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2796 QMAN_EXTERNAL_MAKE_TRUSTED);
2798 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2802 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2804 struct cpu_dyn_regs *dyn_regs =
2805 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2806 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2807 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2808 u32 irq_handler_offset;
2810 /* Set to maximum possible according to physical size */
2811 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2812 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2814 /* WA for H/W bug H3-2116 */
2815 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2817 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2818 if (hdev->stop_on_err)
2819 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2821 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2823 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2824 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2825 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2827 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2828 lower_32_bits(CFG_BASE + irq_handler_offset));
2829 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2830 upper_32_bits(CFG_BASE + irq_handler_offset));
2832 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2833 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2834 WREG32(mmDMA0_CORE_PROT + dma_offset,
2835 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2836 /* If the channel is secured, it should be in MMU bypass mode */
2837 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2838 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2839 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2842 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2845 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2847 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2850 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2852 struct gaudi_device *gaudi = hdev->asic_specific;
2853 struct hl_hw_queue *q;
2854 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2856 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2859 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2860 dma_id = gaudi_dma_assignment[i];
2862 * For queues after the CPU Q need to add 1 to get the correct
2863 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2864 * order to get the correct MSI register.
2868 nic_skip = NIC_NUMBER_OF_ENGINES;
2874 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2875 q_idx = 4 * dma_id + j + cpu_skip;
2876 q = &hdev->kernel_queues[q_idx];
2878 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2879 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2883 gaudi_init_dma_core(hdev, dma_id);
2885 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2888 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2891 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2892 int qman_id, u64 qman_base_addr)
2894 struct cpu_dyn_regs *dyn_regs =
2895 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2896 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2897 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2898 u32 dma_qm_err_cfg, irq_handler_offset;
2899 u32 q_off, dma_qm_offset;
2901 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2903 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2904 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2906 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2907 so_base_en_lo = lower_32_bits(CFG_BASE +
2908 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909 so_base_en_hi = upper_32_bits(CFG_BASE +
2910 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2911 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2912 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2914 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2915 so_base_ws_lo = lower_32_bits(CFG_BASE +
2916 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917 so_base_ws_hi = upper_32_bits(CFG_BASE +
2918 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2920 q_off = dma_qm_offset + qman_id * 4;
2923 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2924 lower_32_bits(qman_base_addr));
2925 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2926 upper_32_bits(qman_base_addr));
2928 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2929 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2930 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2932 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2933 QMAN_CPDMA_SIZE_OFFSET);
2934 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2935 QMAN_CPDMA_SRC_OFFSET);
2936 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2937 QMAN_CPDMA_DST_OFFSET);
2939 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2940 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2941 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2943 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2944 QMAN_LDMA_SIZE_OFFSET);
2945 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2946 QMAN_LDMA_SRC_OFFSET);
2947 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2948 QMAN_LDMA_DST_OFFSET);
2950 /* Configure RAZWI IRQ */
2951 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2952 if (hdev->stop_on_err)
2954 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2956 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2958 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2959 lower_32_bits(CFG_BASE + irq_handler_offset));
2960 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2961 upper_32_bits(CFG_BASE + irq_handler_offset));
2963 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2964 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2967 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2968 QM_ARB_ERR_MSG_EN_MASK);
2970 /* Increase ARB WDT to support streams architecture */
2971 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2972 GAUDI_ARB_WDT_TIMEOUT);
2974 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2975 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2976 QMAN_INTERNAL_MAKE_TRUSTED);
2979 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2980 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2981 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2982 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2984 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2985 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2986 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2988 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2990 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2992 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2997 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2999 struct gaudi_device *gaudi = hdev->asic_specific;
3000 struct gaudi_internal_qman_info *q;
3002 int i, j, dma_id, internal_q_index;
3004 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3007 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3008 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3010 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3012 * Add the CPU queue in order to get the correct queue
3013 * number as all internal queue are placed after it
3015 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3017 q = &gaudi->internal_qmans[internal_q_index];
3018 qman_base_addr = (u64) q->pq_dma_addr;
3019 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3023 /* Initializing lower CP for HBM DMA QMAN */
3024 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3026 gaudi_init_dma_core(hdev, dma_id);
3028 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3031 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3034 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3035 int qman_id, u64 qman_base_addr)
3037 struct cpu_dyn_regs *dyn_regs =
3038 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3039 u32 mtr_base_lo, mtr_base_hi;
3040 u32 so_base_lo, so_base_hi;
3041 u32 irq_handler_offset;
3045 mtr_base_lo = lower_32_bits(CFG_BASE +
3046 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047 mtr_base_hi = upper_32_bits(CFG_BASE +
3048 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3049 so_base_lo = lower_32_bits(CFG_BASE +
3050 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051 so_base_hi = upper_32_bits(CFG_BASE +
3052 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3054 q_off = mme_offset + qman_id * 4;
3057 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3058 lower_32_bits(qman_base_addr));
3059 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3060 upper_32_bits(qman_base_addr));
3062 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3063 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3064 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3066 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3067 QMAN_CPDMA_SIZE_OFFSET);
3068 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3069 QMAN_CPDMA_SRC_OFFSET);
3070 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3071 QMAN_CPDMA_DST_OFFSET);
3073 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3074 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3075 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3077 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3078 QMAN_LDMA_SIZE_OFFSET);
3079 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3080 QMAN_LDMA_SRC_OFFSET);
3081 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3082 QMAN_LDMA_DST_OFFSET);
3084 /* Configure RAZWI IRQ */
3085 mme_id = mme_offset /
3086 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3088 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3089 if (hdev->stop_on_err)
3091 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3093 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3095 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3096 lower_32_bits(CFG_BASE + irq_handler_offset));
3097 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3098 upper_32_bits(CFG_BASE + irq_handler_offset));
3100 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3101 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3104 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3105 QM_ARB_ERR_MSG_EN_MASK);
3107 /* Increase ARB WDT to support streams architecture */
3108 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3109 GAUDI_ARB_WDT_TIMEOUT);
3111 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3112 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3113 QMAN_INTERNAL_MAKE_TRUSTED);
3116 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3117 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3118 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3119 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3122 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3124 struct gaudi_device *gaudi = hdev->asic_specific;
3125 struct gaudi_internal_qman_info *q;
3128 int i, internal_q_index;
3130 if (gaudi->hw_cap_initialized & HW_CAP_MME)
3134 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3135 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3138 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3140 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3141 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3142 q = &gaudi->internal_qmans[internal_q_index];
3143 qman_base_addr = (u64) q->pq_dma_addr;
3144 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3150 /* Initializing lower CP for MME QMANs */
3151 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3152 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3153 gaudi_init_mme_qman(hdev, 0, 4, 0);
3155 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3156 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3158 gaudi->hw_cap_initialized |= HW_CAP_MME;
3161 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3162 int qman_id, u64 qman_base_addr)
3164 struct cpu_dyn_regs *dyn_regs =
3165 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3166 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3167 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3168 u32 tpc_qm_err_cfg, irq_handler_offset;
3171 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3172 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3174 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3175 so_base_en_lo = lower_32_bits(CFG_BASE +
3176 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177 so_base_en_hi = upper_32_bits(CFG_BASE +
3178 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3179 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3180 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3182 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3183 so_base_ws_lo = lower_32_bits(CFG_BASE +
3184 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185 so_base_ws_hi = upper_32_bits(CFG_BASE +
3186 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3188 q_off = tpc_offset + qman_id * 4;
3190 tpc_id = tpc_offset /
3191 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3194 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3195 lower_32_bits(qman_base_addr));
3196 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3197 upper_32_bits(qman_base_addr));
3199 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3200 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3201 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3203 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3204 QMAN_CPDMA_SIZE_OFFSET);
3205 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3206 QMAN_CPDMA_SRC_OFFSET);
3207 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3208 QMAN_CPDMA_DST_OFFSET);
3210 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3211 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3212 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3214 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3215 QMAN_LDMA_SIZE_OFFSET);
3216 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3217 QMAN_LDMA_SRC_OFFSET);
3218 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3219 QMAN_LDMA_DST_OFFSET);
3221 /* Configure RAZWI IRQ */
3222 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3223 if (hdev->stop_on_err)
3225 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3227 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3229 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3230 lower_32_bits(CFG_BASE + irq_handler_offset));
3231 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3232 upper_32_bits(CFG_BASE + irq_handler_offset));
3234 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3235 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3238 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3239 QM_ARB_ERR_MSG_EN_MASK);
3241 /* Increase ARB WDT to support streams architecture */
3242 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3243 GAUDI_ARB_WDT_TIMEOUT);
3245 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3246 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3247 QMAN_INTERNAL_MAKE_TRUSTED);
3250 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3251 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3252 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3253 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3255 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3257 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3259 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3261 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3263 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3268 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3270 struct gaudi_device *gaudi = hdev->asic_specific;
3271 struct gaudi_internal_qman_info *q;
3273 u32 so_base_hi, tpc_offset = 0;
3274 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3275 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3276 int i, tpc_id, internal_q_index;
3278 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3281 so_base_hi = upper_32_bits(CFG_BASE +
3282 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3284 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3285 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3286 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3287 tpc_id * QMAN_STREAMS + i;
3288 q = &gaudi->internal_qmans[internal_q_index];
3289 qman_base_addr = (u64) q->pq_dma_addr;
3290 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3294 /* Initializing lower CP for TPC QMAN */
3295 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3297 /* Enable the QMAN and TPC channel */
3298 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3303 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3306 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3308 gaudi->hw_cap_initialized |=
3309 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3313 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3314 int qman_id, u64 qman_base_addr, int nic_id)
3316 struct cpu_dyn_regs *dyn_regs =
3317 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3318 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3319 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3320 u32 nic_qm_err_cfg, irq_handler_offset;
3323 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3324 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3326 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3327 so_base_en_lo = lower_32_bits(CFG_BASE +
3328 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329 so_base_en_hi = upper_32_bits(CFG_BASE +
3330 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3331 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3332 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3334 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3335 so_base_ws_lo = lower_32_bits(CFG_BASE +
3336 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337 so_base_ws_hi = upper_32_bits(CFG_BASE +
3338 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3340 q_off = nic_offset + qman_id * 4;
3342 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3343 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3345 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3346 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3347 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3349 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3350 QMAN_LDMA_SIZE_OFFSET);
3351 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3352 QMAN_LDMA_SRC_OFFSET);
3353 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3354 QMAN_LDMA_DST_OFFSET);
3356 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3357 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3358 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3359 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3361 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3362 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3363 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3364 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3365 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3368 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3369 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3370 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3372 /* Configure RAZWI IRQ */
3373 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3374 if (hdev->stop_on_err)
3376 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3378 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3380 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3381 lower_32_bits(CFG_BASE + irq_handler_offset));
3382 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3383 upper_32_bits(CFG_BASE + irq_handler_offset));
3385 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3386 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3389 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3390 QM_ARB_ERR_MSG_EN_MASK);
3392 /* Increase ARB WDT to support streams architecture */
3393 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3394 GAUDI_ARB_WDT_TIMEOUT);
3396 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3397 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3398 QMAN_INTERNAL_MAKE_TRUSTED);
3402 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3404 struct gaudi_device *gaudi = hdev->asic_specific;
3405 struct gaudi_internal_qman_info *q;
3408 u32 nic_delta_between_qmans =
3409 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410 u32 nic_delta_between_nics =
3411 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3412 int i, nic_id, internal_q_index;
3414 if (!hdev->nic_ports_mask)
3417 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3420 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3422 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3423 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3424 nic_offset += nic_delta_between_qmans;
3426 nic_offset -= (nic_delta_between_qmans * 2);
3427 nic_offset += nic_delta_between_nics;
3432 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3433 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3434 nic_id * QMAN_STREAMS + i;
3435 q = &gaudi->internal_qmans[internal_q_index];
3436 qman_base_addr = (u64) q->pq_dma_addr;
3437 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3438 qman_base_addr, nic_id);
3441 /* Enable the QMAN */
3442 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3444 nic_offset += nic_delta_between_qmans;
3446 nic_offset -= (nic_delta_between_qmans * 2);
3447 nic_offset += nic_delta_between_nics;
3450 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3454 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3456 struct gaudi_device *gaudi = hdev->asic_specific;
3458 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3461 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3462 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3463 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3466 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3468 struct gaudi_device *gaudi = hdev->asic_specific;
3470 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3473 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3474 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3475 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3476 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3477 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3480 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3482 struct gaudi_device *gaudi = hdev->asic_specific;
3484 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3487 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3488 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3491 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3493 struct gaudi_device *gaudi = hdev->asic_specific;
3497 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3500 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3501 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3502 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3506 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3508 struct gaudi_device *gaudi = hdev->asic_specific;
3509 u32 nic_mask, nic_offset = 0;
3510 u32 nic_delta_between_qmans =
3511 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512 u32 nic_delta_between_nics =
3513 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3516 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3517 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3519 if (gaudi->hw_cap_initialized & nic_mask)
3520 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3522 nic_offset += nic_delta_between_qmans;
3524 nic_offset -= (nic_delta_between_qmans * 2);
3525 nic_offset += nic_delta_between_nics;
3530 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3532 struct gaudi_device *gaudi = hdev->asic_specific;
3534 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3537 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3538 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3540 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3543 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3545 struct gaudi_device *gaudi = hdev->asic_specific;
3547 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3550 /* Stop CPs of HBM DMA QMANs */
3552 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3556 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3559 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3561 struct gaudi_device *gaudi = hdev->asic_specific;
3563 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3566 /* Stop CPs of MME QMANs */
3567 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3568 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3571 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3573 struct gaudi_device *gaudi = hdev->asic_specific;
3575 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3578 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3585 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3588 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3590 struct gaudi_device *gaudi = hdev->asic_specific;
3592 /* Stop upper CPs of QMANs */
3594 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3595 WREG32(mmNIC0_QM0_GLBL_CFG1,
3596 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3597 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3598 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3600 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3601 WREG32(mmNIC0_QM1_GLBL_CFG1,
3602 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3603 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3604 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3606 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3607 WREG32(mmNIC1_QM0_GLBL_CFG1,
3608 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3609 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3610 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3612 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3613 WREG32(mmNIC1_QM1_GLBL_CFG1,
3614 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3615 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3616 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3618 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3619 WREG32(mmNIC2_QM0_GLBL_CFG1,
3620 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3621 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3622 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3624 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3625 WREG32(mmNIC2_QM1_GLBL_CFG1,
3626 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3627 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3628 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3630 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3631 WREG32(mmNIC3_QM0_GLBL_CFG1,
3632 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3633 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3634 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3636 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3637 WREG32(mmNIC3_QM1_GLBL_CFG1,
3638 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3639 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3640 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3642 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3643 WREG32(mmNIC4_QM0_GLBL_CFG1,
3644 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3645 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3646 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3648 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3649 WREG32(mmNIC4_QM1_GLBL_CFG1,
3650 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3651 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3652 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3655 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3657 struct gaudi_device *gaudi = hdev->asic_specific;
3659 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3662 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3664 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3667 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3669 struct gaudi_device *gaudi = hdev->asic_specific;
3671 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3674 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3678 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3681 static void gaudi_mme_stall(struct hl_device *hdev)
3683 struct gaudi_device *gaudi = hdev->asic_specific;
3685 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3688 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3689 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3690 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3691 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3692 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3693 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3694 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3695 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3696 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3697 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3698 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3699 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3700 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3701 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3702 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3703 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3704 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3707 static void gaudi_tpc_stall(struct hl_device *hdev)
3709 struct gaudi_device *gaudi = hdev->asic_specific;
3711 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3714 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3721 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3724 static void gaudi_set_clock_gating(struct hl_device *hdev)
3726 struct gaudi_device *gaudi = hdev->asic_specific;
3731 /* In case we are during debug session, don't enable the clock gate
3732 * as it may interfere
3737 if (hdev->asic_prop.fw_security_enabled)
3740 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3741 enable = !!(hdev->clock_gating_mask &
3742 (BIT_ULL(gaudi_dma_assignment[i])));
3744 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3745 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3746 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3747 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3748 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3751 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3752 enable = !!(hdev->clock_gating_mask &
3753 (BIT_ULL(gaudi_dma_assignment[i])));
3755 /* GC sends work to DMA engine through Upper CP in DMA5 so
3756 * we need to not enable clock gating in that DMA
3758 if (i == GAUDI_HBM_DMA_4)
3761 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3762 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3763 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3764 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3765 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3768 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3769 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3770 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3772 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3773 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3774 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3776 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3777 enable = !!(hdev->clock_gating_mask &
3778 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3780 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3781 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3782 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3783 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3785 qman_offset += TPC_QMAN_OFFSET;
3788 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3791 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3793 struct gaudi_device *gaudi = hdev->asic_specific;
3797 if (hdev->asic_prop.fw_security_enabled)
3800 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3801 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3802 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3804 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3807 WREG32(mmMME0_QM_CGM_CFG, 0);
3808 WREG32(mmMME0_QM_CGM_CFG1, 0);
3809 WREG32(mmMME2_QM_CGM_CFG, 0);
3810 WREG32(mmMME2_QM_CGM_CFG1, 0);
3812 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3813 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3814 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3816 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3819 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3822 static void gaudi_enable_timestamp(struct hl_device *hdev)
3824 /* Disable the timestamp counter */
3825 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3827 /* Zero the lower/upper parts of the 64-bit counter */
3828 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3829 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3831 /* Enable the counter */
3832 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3835 static void gaudi_disable_timestamp(struct hl_device *hdev)
3837 /* Disable the timestamp counter */
3838 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3841 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3843 u32 wait_timeout_ms;
3846 "Halting compute engines and disabling interrupts\n");
3849 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3851 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3856 gaudi_stop_nic_qmans(hdev);
3857 gaudi_stop_mme_qmans(hdev);
3858 gaudi_stop_tpc_qmans(hdev);
3859 gaudi_stop_hbm_dma_qmans(hdev);
3860 gaudi_stop_pci_dma_qmans(hdev);
3862 hdev->asic_funcs->disable_clock_gating(hdev);
3864 msleep(wait_timeout_ms);
3866 gaudi_pci_dma_stall(hdev);
3867 gaudi_hbm_dma_stall(hdev);
3868 gaudi_tpc_stall(hdev);
3869 gaudi_mme_stall(hdev);
3871 msleep(wait_timeout_ms);
3873 gaudi_disable_nic_qmans(hdev);
3874 gaudi_disable_mme_qmans(hdev);
3875 gaudi_disable_tpc_qmans(hdev);
3876 gaudi_disable_hbm_dma_qmans(hdev);
3877 gaudi_disable_pci_dma_qmans(hdev);
3879 gaudi_disable_timestamp(hdev);
3882 gaudi_disable_msi(hdev);
3885 static int gaudi_mmu_init(struct hl_device *hdev)
3887 struct asic_fixed_properties *prop = &hdev->asic_prop;
3888 struct gaudi_device *gaudi = hdev->asic_specific;
3892 if (!hdev->mmu_enable)
3895 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3898 for (i = 0 ; i < prop->max_asid ; i++) {
3899 hop0_addr = prop->mmu_pgt_addr +
3900 (i * prop->mmu_hop_table_size);
3902 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3905 "failed to set hop0 addr for asid %d\n", i);
3910 /* init MMU cache manage page */
3911 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3912 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3914 /* mem cache invalidation */
3915 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3917 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3919 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3920 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3922 WREG32(mmSTLB_HOP_CONFIGURATION,
3923 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3926 * The H/W expects the first PI after init to be 1. After wraparound
3929 gaudi->mmu_cache_inv_pi = 1;
3931 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3939 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3943 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3945 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3948 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3952 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3954 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3957 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3959 struct dynamic_fw_load_mgr *dynamic_loader;
3960 struct cpu_dyn_regs *dyn_regs;
3962 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3965 * here we update initial values for few specific dynamic regs (as
3966 * before reading the first descriptor from FW those value has to be
3967 * hard-coded) in later stages of the protocol those values will be
3968 * updated automatically by reading the FW descriptor so data there
3969 * will always be up-to-date
3971 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3972 dyn_regs->kmd_msg_to_cpu =
3973 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3974 dyn_regs->cpu_cmd_status_to_host =
3975 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3977 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3980 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3982 struct static_fw_load_mgr *static_loader;
3984 static_loader = &hdev->fw_loader.static_loader;
3986 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3987 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3988 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3989 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3990 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3991 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3992 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3993 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3994 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3995 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3996 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3997 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3998 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3999 GAUDI_PLDM_RESET_WAIT_MSEC :
4000 GAUDI_CPU_RESET_WAIT_MSEC;
4003 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4005 struct asic_fixed_properties *prop = &hdev->asic_prop;
4006 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4008 /* fill common fields */
4009 fw_loader->linux_loaded = false;
4010 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4011 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4012 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4013 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4014 fw_loader->skip_bmc = !hdev->bmc_enable;
4015 fw_loader->sram_bar_id = SRAM_BAR_ID;
4016 fw_loader->dram_bar_id = HBM_BAR_ID;
4018 if (prop->dynamic_fw_load)
4019 gaudi_init_dynamic_firmware_loader(hdev);
4021 gaudi_init_static_firmware_loader(hdev);
4024 static int gaudi_init_cpu(struct hl_device *hdev)
4026 struct gaudi_device *gaudi = hdev->asic_specific;
4029 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4032 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4036 * The device CPU works with 40 bits addresses.
4037 * This register sets the extension to 50 bits.
4039 if (!hdev->asic_prop.fw_security_enabled)
4040 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4042 rc = hl_fw_init_cpu(hdev);
4047 gaudi->hw_cap_initialized |= HW_CAP_CPU;
4052 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4054 struct cpu_dyn_regs *dyn_regs =
4055 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4056 struct asic_fixed_properties *prop = &hdev->asic_prop;
4057 struct gaudi_device *gaudi = hdev->asic_specific;
4058 u32 status, irq_handler_offset;
4060 struct hl_hw_queue *cpu_pq =
4061 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4064 if (!hdev->cpu_queues_enable)
4067 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4070 eq = &hdev->event_queue;
4072 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4073 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4075 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4076 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4078 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4079 lower_32_bits(hdev->cpu_accessible_dma_address));
4080 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4081 upper_32_bits(hdev->cpu_accessible_dma_address));
4083 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4084 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4085 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4087 /* Used for EQ CI */
4088 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4090 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4092 if (gaudi->multi_msi_mode)
4093 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4095 WREG32(mmCPU_IF_QUEUE_INIT,
4096 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4098 irq_handler_offset = prop->gic_interrupts_enable ?
4099 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4100 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4102 WREG32(irq_handler_offset,
4103 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4105 err = hl_poll_timeout(
4107 mmCPU_IF_QUEUE_INIT,
4109 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4115 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4119 /* update FW application security bits */
4120 if (prop->fw_cpu_boot_dev_sts0_valid)
4121 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4122 if (prop->fw_cpu_boot_dev_sts1_valid)
4123 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4125 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4129 static void gaudi_pre_hw_init(struct hl_device *hdev)
4131 /* Perform read from the device to make sure device is up */
4134 if (!hdev->asic_prop.fw_security_enabled) {
4135 /* Set the access through PCI bars (Linux driver only) as
4138 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4139 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4140 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4142 /* Perform read to flush the waiting writes to ensure
4143 * configuration was set in the device
4145 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4149 * Let's mark in the H/W that we have reached this point. We check
4150 * this value in the reset_before_init function to understand whether
4151 * we need to reset the chip before doing H/W init. This register is
4152 * cleared by the H/W upon H/W reset
4154 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4157 static int gaudi_hw_init(struct hl_device *hdev)
4159 struct gaudi_device *gaudi = hdev->asic_specific;
4162 gaudi_pre_hw_init(hdev);
4164 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4165 * So we set it here and if anyone tries to move it later to
4166 * a different address, there will be an error
4168 if (hdev->asic_prop.iatu_done_by_fw)
4169 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4172 * Before pushing u-boot/linux to device, need to set the hbm bar to
4173 * base address of dram
4175 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4177 "failed to map HBM bar to DRAM base address\n");
4181 rc = gaudi_init_cpu(hdev);
4183 dev_err(hdev->dev, "failed to initialize CPU\n");
4187 /* In case the clock gating was enabled in preboot we need to disable
4188 * it here before touching the MME/TPC registers.
4189 * There is no need to take clk gating mutex because when this function
4190 * runs, no other relevant code can run
4192 hdev->asic_funcs->disable_clock_gating(hdev);
4194 /* SRAM scrambler must be initialized after CPU is running from HBM */
4195 gaudi_init_scrambler_sram(hdev);
4197 /* This is here just in case we are working without CPU */
4198 gaudi_init_scrambler_hbm(hdev);
4200 gaudi_init_golden_registers(hdev);
4202 rc = gaudi_mmu_init(hdev);
4206 gaudi_init_security(hdev);
4208 gaudi_init_pci_dma_qmans(hdev);
4210 gaudi_init_hbm_dma_qmans(hdev);
4212 gaudi_init_mme_qmans(hdev);
4214 gaudi_init_tpc_qmans(hdev);
4216 gaudi_init_nic_qmans(hdev);
4218 hdev->asic_funcs->set_clock_gating(hdev);
4220 gaudi_enable_timestamp(hdev);
4222 /* MSI must be enabled before CPU queues and NIC are initialized */
4223 rc = gaudi_enable_msi(hdev);
4225 goto disable_queues;
4227 /* must be called after MSI was enabled */
4228 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4230 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4235 /* Perform read from the device to flush all configuration */
4241 gaudi_disable_msi(hdev);
4243 gaudi_disable_mme_qmans(hdev);
4244 gaudi_disable_pci_dma_qmans(hdev);
4249 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4251 struct cpu_dyn_regs *dyn_regs =
4252 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4253 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4254 struct gaudi_device *gaudi = hdev->asic_specific;
4255 bool driver_performs_reset;
4258 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4263 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4264 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4266 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4267 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4272 "Firmware performs HARD reset, going to wait %dms\n",
4278 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4279 !hdev->asic_prop.hard_reset_done_by_fw);
4281 /* Set device to handle FLR by H/W as we will put the device CPU to
4284 if (driver_performs_reset)
4285 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4286 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4288 /* If linux is loaded in the device CPU we need to communicate with it
4289 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4290 * registers in case of old F/Ws
4292 if (hdev->fw_loader.linux_loaded) {
4293 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4294 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4295 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4297 WREG32(irq_handler_offset,
4298 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4300 if (hdev->asic_prop.hard_reset_done_by_fw)
4301 hl_fw_ask_hard_reset_without_linux(hdev);
4303 hl_fw_ask_halt_machine_without_linux(hdev);
4306 if (driver_performs_reset) {
4308 /* Configure the reset registers. Must be done as early as
4309 * possible in case we fail during H/W initialization
4311 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4312 (CFG_RST_H_DMA_MASK |
4313 CFG_RST_H_MME_MASK |
4315 CFG_RST_H_TPC_7_MASK));
4317 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4319 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4320 (CFG_RST_H_HBM_MASK |
4321 CFG_RST_H_TPC_7_MASK |
4322 CFG_RST_H_NIC_MASK |
4324 CFG_RST_H_DMA_MASK |
4325 CFG_RST_H_MME_MASK |
4326 CFG_RST_H_CPU_MASK |
4327 CFG_RST_H_MMU_MASK));
4329 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4330 (CFG_RST_L_IF_MASK |
4331 CFG_RST_L_PSOC_MASK |
4332 CFG_RST_L_TPC_MASK));
4334 msleep(cpu_timeout_ms);
4336 /* Tell ASIC not to re-initialize PCIe */
4337 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4339 /* Restart BTL/BLR upon hard-reset */
4340 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4342 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4343 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4346 "Issued HARD reset command, going to wait %dms\n",
4350 "Firmware performs HARD reset, going to wait %dms\n",
4356 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4357 * itself is in reset. Need to wait until the reset is deasserted
4359 msleep(reset_timeout_ms);
4361 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4362 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4364 "Timeout while waiting for device to reset 0x%x\n",
4368 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4369 HW_CAP_HBM | HW_CAP_PCI_DMA |
4370 HW_CAP_MME | HW_CAP_TPC_MASK |
4371 HW_CAP_HBM_DMA | HW_CAP_PLL |
4372 HW_CAP_NIC_MASK | HW_CAP_MMU |
4373 HW_CAP_SRAM_SCRAMBLER |
4374 HW_CAP_HBM_SCRAMBLER |
4377 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4379 hdev->device_cpu_is_halted = false;
4383 static int gaudi_suspend(struct hl_device *hdev)
4387 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4389 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4394 static int gaudi_resume(struct hl_device *hdev)
4396 return gaudi_init_iatu(hdev);
4399 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4400 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4404 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4405 VM_DONTCOPY | VM_NORESERVE;
4407 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4408 (dma_addr - HOST_PHYS_BASE), size);
4410 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4415 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4417 struct cpu_dyn_regs *dyn_regs =
4418 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4419 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4420 struct gaudi_device *gaudi = hdev->asic_specific;
4421 bool invalid_queue = false;
4424 switch (hw_queue_id) {
4425 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4426 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4427 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4428 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4429 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4432 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4433 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4434 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4435 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4436 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4439 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4440 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4441 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4442 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4443 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4446 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4447 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4448 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4449 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4450 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4453 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4454 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4455 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4456 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4457 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4460 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4461 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4462 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4463 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4464 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4467 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4468 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4469 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4470 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4471 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4474 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4475 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4476 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4477 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4478 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4481 case GAUDI_QUEUE_ID_CPU_PQ:
4482 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4483 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4485 invalid_queue = true;
4488 case GAUDI_QUEUE_ID_MME_0_0:
4489 db_reg_offset = mmMME2_QM_PQ_PI_0;
4492 case GAUDI_QUEUE_ID_MME_0_1:
4493 db_reg_offset = mmMME2_QM_PQ_PI_1;
4496 case GAUDI_QUEUE_ID_MME_0_2:
4497 db_reg_offset = mmMME2_QM_PQ_PI_2;
4500 case GAUDI_QUEUE_ID_MME_0_3:
4501 db_reg_offset = mmMME2_QM_PQ_PI_3;
4504 case GAUDI_QUEUE_ID_MME_1_0:
4505 db_reg_offset = mmMME0_QM_PQ_PI_0;
4508 case GAUDI_QUEUE_ID_MME_1_1:
4509 db_reg_offset = mmMME0_QM_PQ_PI_1;
4512 case GAUDI_QUEUE_ID_MME_1_2:
4513 db_reg_offset = mmMME0_QM_PQ_PI_2;
4516 case GAUDI_QUEUE_ID_MME_1_3:
4517 db_reg_offset = mmMME0_QM_PQ_PI_3;
4520 case GAUDI_QUEUE_ID_TPC_0_0:
4521 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4524 case GAUDI_QUEUE_ID_TPC_0_1:
4525 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4528 case GAUDI_QUEUE_ID_TPC_0_2:
4529 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4532 case GAUDI_QUEUE_ID_TPC_0_3:
4533 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4536 case GAUDI_QUEUE_ID_TPC_1_0:
4537 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4540 case GAUDI_QUEUE_ID_TPC_1_1:
4541 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4544 case GAUDI_QUEUE_ID_TPC_1_2:
4545 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4548 case GAUDI_QUEUE_ID_TPC_1_3:
4549 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4552 case GAUDI_QUEUE_ID_TPC_2_0:
4553 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4556 case GAUDI_QUEUE_ID_TPC_2_1:
4557 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4560 case GAUDI_QUEUE_ID_TPC_2_2:
4561 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4564 case GAUDI_QUEUE_ID_TPC_2_3:
4565 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4568 case GAUDI_QUEUE_ID_TPC_3_0:
4569 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4572 case GAUDI_QUEUE_ID_TPC_3_1:
4573 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4576 case GAUDI_QUEUE_ID_TPC_3_2:
4577 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4580 case GAUDI_QUEUE_ID_TPC_3_3:
4581 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4584 case GAUDI_QUEUE_ID_TPC_4_0:
4585 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4588 case GAUDI_QUEUE_ID_TPC_4_1:
4589 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4592 case GAUDI_QUEUE_ID_TPC_4_2:
4593 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4596 case GAUDI_QUEUE_ID_TPC_4_3:
4597 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4600 case GAUDI_QUEUE_ID_TPC_5_0:
4601 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4604 case GAUDI_QUEUE_ID_TPC_5_1:
4605 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4608 case GAUDI_QUEUE_ID_TPC_5_2:
4609 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4612 case GAUDI_QUEUE_ID_TPC_5_3:
4613 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4616 case GAUDI_QUEUE_ID_TPC_6_0:
4617 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4620 case GAUDI_QUEUE_ID_TPC_6_1:
4621 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4624 case GAUDI_QUEUE_ID_TPC_6_2:
4625 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4628 case GAUDI_QUEUE_ID_TPC_6_3:
4629 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4632 case GAUDI_QUEUE_ID_TPC_7_0:
4633 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4636 case GAUDI_QUEUE_ID_TPC_7_1:
4637 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4640 case GAUDI_QUEUE_ID_TPC_7_2:
4641 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4644 case GAUDI_QUEUE_ID_TPC_7_3:
4645 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4648 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4649 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4650 invalid_queue = true;
4652 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4653 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4656 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4657 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4658 invalid_queue = true;
4660 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4661 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4664 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4665 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4666 invalid_queue = true;
4668 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4669 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4672 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4673 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4674 invalid_queue = true;
4676 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4677 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4680 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4681 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4682 invalid_queue = true;
4684 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4685 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4688 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4689 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4690 invalid_queue = true;
4692 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4693 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4696 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4697 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4698 invalid_queue = true;
4700 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4701 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4704 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4705 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4706 invalid_queue = true;
4708 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4709 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4712 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4713 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4714 invalid_queue = true;
4716 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4717 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4720 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4721 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4722 invalid_queue = true;
4724 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4725 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4729 invalid_queue = true;
4732 if (invalid_queue) {
4733 /* Should never get here */
4734 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4741 /* ring the doorbell */
4742 WREG32(db_reg_offset, db_value);
4744 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4745 /* make sure device CPU will read latest data from host */
4748 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4749 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4750 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4752 WREG32(irq_handler_offset,
4753 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4757 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4760 __le64 *pbd = (__le64 *) bd;
4762 /* The QMANs are on the host memory so a simple copy suffice */
4767 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4768 dma_addr_t *dma_handle, gfp_t flags)
4770 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4773 /* Shift to the device's base physical address of host memory */
4775 *dma_handle += HOST_PHYS_BASE;
4780 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4781 void *cpu_addr, dma_addr_t dma_handle)
4783 /* Cancel the device's base physical address of host memory */
4784 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4786 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4789 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4791 struct asic_fixed_properties *prop = &hdev->asic_prop;
4792 u64 cur_addr = DRAM_BASE_ADDR_USER;
4797 while (cur_addr < prop->dram_end_address) {
4798 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4799 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4802 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4805 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4806 cur_addr, cur_addr + chunk_size);
4808 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4809 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4810 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4811 lower_32_bits(cur_addr));
4812 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4813 upper_32_bits(cur_addr));
4814 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4816 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4817 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4818 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4820 cur_addr += chunk_size;
4822 if (cur_addr == prop->dram_end_address)
4826 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4827 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4829 rc = hl_poll_timeout(
4831 mmDMA0_CORE_STS0 + dma_offset,
4833 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4835 HBM_SCRUBBING_TIMEOUT_US);
4839 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4849 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4851 struct asic_fixed_properties *prop = &hdev->asic_prop;
4852 struct gaudi_device *gaudi = hdev->asic_specific;
4856 if (!hdev->memory_scrub)
4859 if (!addr && !size) {
4860 /* Wait till device is idle */
4861 rc = hl_poll_timeout(
4863 mmDMA0_CORE_STS0/* dummy */,
4865 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4868 HBM_SCRUBBING_TIMEOUT_US);
4870 dev_err(hdev->dev, "waiting for idle timeout\n");
4875 addr = prop->sram_user_base_address;
4876 size = hdev->pldm ? 0x10000 :
4877 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4878 val = 0x7777777777777777ull;
4880 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4883 "Failed to clear SRAM in mem scrub all\n");
4887 mutex_lock(&gaudi->clk_gate_mutex);
4888 hdev->asic_funcs->disable_clock_gating(hdev);
4890 /* Scrub HBM using all DMA channels in parallel */
4891 rc = gaudi_hbm_scrubbing(hdev);
4894 "Failed to clear HBM in mem scrub all\n");
4896 hdev->asic_funcs->set_clock_gating(hdev);
4897 mutex_unlock(&gaudi->clk_gate_mutex);
4903 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4904 u32 queue_id, dma_addr_t *dma_handle,
4907 struct gaudi_device *gaudi = hdev->asic_specific;
4908 struct gaudi_internal_qman_info *q;
4910 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4911 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4912 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4916 q = &gaudi->internal_qmans[queue_id];
4917 *dma_handle = q->pq_dma_addr;
4918 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4920 return q->pq_kernel_addr;
4923 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4924 u16 len, u32 timeout, u64 *result)
4926 struct gaudi_device *gaudi = hdev->asic_specific;
4928 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4935 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4937 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4941 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4943 struct packet_msg_prot *fence_pkt;
4944 dma_addr_t pkt_dma_addr;
4945 u32 fence_val, tmp, timeout_usec;
4946 dma_addr_t fence_dma_addr;
4951 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4953 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4955 fence_val = GAUDI_QMAN0_FENCE_VAL;
4957 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4961 "Failed to allocate memory for H/W queue %d testing\n",
4968 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4969 sizeof(struct packet_msg_prot),
4970 GFP_KERNEL, &pkt_dma_addr);
4973 "Failed to allocate packet for H/W queue %d testing\n",
4976 goto free_fence_ptr;
4979 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4980 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4981 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4983 fence_pkt->ctl = cpu_to_le32(tmp);
4984 fence_pkt->value = cpu_to_le32(fence_val);
4985 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4987 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4988 sizeof(struct packet_msg_prot),
4992 "Failed to send fence packet to H/W queue %d\n",
4997 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4998 1000, timeout_usec, true);
5000 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
5002 if (rc == -ETIMEDOUT) {
5004 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5005 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5010 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5013 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5018 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5020 struct gaudi_device *gaudi = hdev->asic_specific;
5023 * check capability here as send_cpu_message() won't update the result
5024 * value if no capability
5026 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5029 return hl_fw_test_cpu_queue(hdev);
5032 static int gaudi_test_queues(struct hl_device *hdev)
5034 int i, rc, ret_val = 0;
5036 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5037 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5038 rc = gaudi_test_queue(hdev, i);
5044 rc = gaudi_test_cpu_queue(hdev);
5051 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5052 gfp_t mem_flags, dma_addr_t *dma_handle)
5056 if (size > GAUDI_DMA_POOL_BLK_SIZE)
5059 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5061 /* Shift to the device's base physical address of host memory */
5063 *dma_handle += HOST_PHYS_BASE;
5068 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5069 dma_addr_t dma_addr)
5071 /* Cancel the device's base physical address of host memory */
5072 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5074 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5077 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5078 size_t size, dma_addr_t *dma_handle)
5080 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5083 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5084 size_t size, void *vaddr)
5086 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5089 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5090 int nents, enum dma_data_direction dir)
5092 struct scatterlist *sg;
5095 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5098 /* Shift to the device's base physical address of host memory */
5099 for_each_sg(sgl, sg, nents, i)
5100 sg->dma_address += HOST_PHYS_BASE;
5105 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5106 int nents, enum dma_data_direction dir)
5108 struct scatterlist *sg;
5111 /* Cancel the device's base physical address of host memory */
5112 for_each_sg(sgl, sg, nents, i)
5113 sg->dma_address -= HOST_PHYS_BASE;
5115 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5118 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5119 struct sg_table *sgt)
5121 struct scatterlist *sg, *sg_next_iter;
5122 u32 count, dma_desc_cnt;
5124 dma_addr_t addr, addr_next;
5128 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5130 len = sg_dma_len(sg);
5131 addr = sg_dma_address(sg);
5136 while ((count + 1) < sgt->nents) {
5137 sg_next_iter = sg_next(sg);
5138 len_next = sg_dma_len(sg_next_iter);
5139 addr_next = sg_dma_address(sg_next_iter);
5144 if ((addr + len == addr_next) &&
5145 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5157 return dma_desc_cnt * sizeof(struct packet_lin_dma);
5160 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5161 struct hl_cs_parser *parser,
5162 struct packet_lin_dma *user_dma_pkt,
5163 u64 addr, enum dma_data_direction dir)
5165 struct hl_userptr *userptr;
5168 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5169 parser->job_userptr_list, &userptr))
5170 goto already_pinned;
5172 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5176 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5181 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5183 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5184 userptr->sgt->nents, dir);
5186 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5190 userptr->dma_mapped = true;
5194 parser->patched_cb_size +=
5195 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5200 list_del(&userptr->job_node);
5201 hl_unpin_host_memory(hdev, userptr);
5207 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5208 struct hl_cs_parser *parser,
5209 struct packet_lin_dma *user_dma_pkt,
5212 enum dma_data_direction dir;
5213 bool skip_host_mem_pin = false, user_memset;
5217 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5218 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5219 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5223 skip_host_mem_pin = true;
5225 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5226 dir = DMA_TO_DEVICE;
5227 addr = le64_to_cpu(user_dma_pkt->src_addr);
5229 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5230 dir = DMA_FROM_DEVICE;
5231 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5232 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5233 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5236 if (skip_host_mem_pin)
5237 parser->patched_cb_size += sizeof(*user_dma_pkt);
5239 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5245 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5246 struct hl_cs_parser *parser,
5247 struct packet_lin_dma *user_dma_pkt)
5249 bool src_in_host = false;
5250 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5251 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5252 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5254 dev_dbg(hdev->dev, "DMA packet details:\n");
5255 dev_dbg(hdev->dev, "source == 0x%llx\n",
5256 le64_to_cpu(user_dma_pkt->src_addr));
5257 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5258 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5261 * Special handling for DMA with size 0. Bypass all validations
5262 * because no transactions will be done except for WR_COMP, which
5263 * is not a security issue
5265 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5266 parser->patched_cb_size += sizeof(*user_dma_pkt);
5270 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5273 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5277 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5278 struct hl_cs_parser *parser,
5279 struct packet_load_and_exe *user_pkt)
5283 cfg = le32_to_cpu(user_pkt->cfg);
5285 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5287 "User not allowed to use Load and Execute\n");
5291 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5296 static int gaudi_validate_cb(struct hl_device *hdev,
5297 struct hl_cs_parser *parser, bool is_mmu)
5299 u32 cb_parsed_length = 0;
5302 parser->patched_cb_size = 0;
5304 /* cb_user_size is more than 0 so loop will always be executed */
5305 while (cb_parsed_length < parser->user_cb_size) {
5306 enum packet_id pkt_id;
5308 struct gaudi_packet *user_pkt;
5310 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5312 pkt_id = (enum packet_id) (
5313 (le64_to_cpu(user_pkt->header) &
5314 PACKET_HEADER_PACKET_ID_MASK) >>
5315 PACKET_HEADER_PACKET_ID_SHIFT);
5317 if (!validate_packet_id(pkt_id)) {
5318 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5323 pkt_size = gaudi_packet_sizes[pkt_id];
5324 cb_parsed_length += pkt_size;
5325 if (cb_parsed_length > parser->user_cb_size) {
5327 "packet 0x%x is out of CB boundary\n", pkt_id);
5333 case PACKET_MSG_PROT:
5335 "User not allowed to use MSG_PROT\n");
5340 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5345 dev_err(hdev->dev, "User not allowed to use STOP\n");
5349 case PACKET_WREG_BULK:
5351 "User not allowed to use WREG_BULK\n");
5355 case PACKET_LOAD_AND_EXE:
5356 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5357 (struct packet_load_and_exe *) user_pkt);
5360 case PACKET_LIN_DMA:
5361 parser->contains_dma_pkt = true;
5363 parser->patched_cb_size += pkt_size;
5365 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5366 (struct packet_lin_dma *) user_pkt);
5369 case PACKET_WREG_32:
5370 case PACKET_MSG_LONG:
5371 case PACKET_MSG_SHORT:
5375 case PACKET_ARB_POINT:
5376 parser->patched_cb_size += pkt_size;
5380 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5391 * The new CB should have space at the end for two MSG_PROT packets:
5392 * 1. A packet that will act as a completion packet
5393 * 2. A packet that will generate MSI-X interrupt
5395 if (parser->completion)
5396 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5401 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5402 struct hl_cs_parser *parser,
5403 struct packet_lin_dma *user_dma_pkt,
5404 struct packet_lin_dma *new_dma_pkt,
5405 u32 *new_dma_pkt_size)
5407 struct hl_userptr *userptr;
5408 struct scatterlist *sg, *sg_next_iter;
5409 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5411 dma_addr_t dma_addr, dma_addr_next;
5412 u64 device_memory_addr, addr;
5413 enum dma_data_direction dir;
5414 struct sg_table *sgt;
5415 bool src_in_host = false;
5416 bool skip_host_mem_pin = false;
5419 ctl = le32_to_cpu(user_dma_pkt->ctl);
5421 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5424 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5425 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5428 addr = le64_to_cpu(user_dma_pkt->src_addr);
5429 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5430 dir = DMA_TO_DEVICE;
5432 skip_host_mem_pin = true;
5434 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5435 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5436 dir = DMA_FROM_DEVICE;
5439 if ((!skip_host_mem_pin) &&
5440 (!hl_userptr_is_pinned(hdev, addr,
5441 le32_to_cpu(user_dma_pkt->tsize),
5442 parser->job_userptr_list, &userptr))) {
5443 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5444 addr, user_dma_pkt->tsize);
5448 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5449 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5450 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5454 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5459 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5460 len = sg_dma_len(sg);
5461 dma_addr = sg_dma_address(sg);
5466 while ((count + 1) < sgt->nents) {
5467 sg_next_iter = sg_next(sg);
5468 len_next = sg_dma_len(sg_next_iter);
5469 dma_addr_next = sg_dma_address(sg_next_iter);
5474 if ((dma_addr + len == dma_addr_next) &&
5475 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5484 ctl = le32_to_cpu(user_dma_pkt->ctl);
5485 if (likely(dma_desc_cnt))
5486 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5487 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5488 new_dma_pkt->ctl = cpu_to_le32(ctl);
5489 new_dma_pkt->tsize = cpu_to_le32(len);
5491 if (dir == DMA_TO_DEVICE) {
5492 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5493 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5495 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5496 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5500 device_memory_addr += len;
5505 if (!dma_desc_cnt) {
5507 "Error of 0 SG entries when patching DMA packet\n");
5511 /* Fix the last dma packet - wrcomp must be as user set it */
5513 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5515 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5520 static int gaudi_patch_cb(struct hl_device *hdev,
5521 struct hl_cs_parser *parser)
5523 u32 cb_parsed_length = 0;
5524 u32 cb_patched_cur_length = 0;
5527 /* cb_user_size is more than 0 so loop will always be executed */
5528 while (cb_parsed_length < parser->user_cb_size) {
5529 enum packet_id pkt_id;
5531 u32 new_pkt_size = 0;
5532 struct gaudi_packet *user_pkt, *kernel_pkt;
5534 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5535 kernel_pkt = parser->patched_cb->kernel_address +
5536 cb_patched_cur_length;
5538 pkt_id = (enum packet_id) (
5539 (le64_to_cpu(user_pkt->header) &
5540 PACKET_HEADER_PACKET_ID_MASK) >>
5541 PACKET_HEADER_PACKET_ID_SHIFT);
5543 if (!validate_packet_id(pkt_id)) {
5544 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5549 pkt_size = gaudi_packet_sizes[pkt_id];
5550 cb_parsed_length += pkt_size;
5551 if (cb_parsed_length > parser->user_cb_size) {
5553 "packet 0x%x is out of CB boundary\n", pkt_id);
5559 case PACKET_LIN_DMA:
5560 rc = gaudi_patch_dma_packet(hdev, parser,
5561 (struct packet_lin_dma *) user_pkt,
5562 (struct packet_lin_dma *) kernel_pkt,
5564 cb_patched_cur_length += new_pkt_size;
5567 case PACKET_MSG_PROT:
5569 "User not allowed to use MSG_PROT\n");
5574 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5579 dev_err(hdev->dev, "User not allowed to use STOP\n");
5583 case PACKET_WREG_32:
5584 case PACKET_WREG_BULK:
5585 case PACKET_MSG_LONG:
5586 case PACKET_MSG_SHORT:
5590 case PACKET_ARB_POINT:
5591 case PACKET_LOAD_AND_EXE:
5592 memcpy(kernel_pkt, user_pkt, pkt_size);
5593 cb_patched_cur_length += pkt_size;
5597 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5610 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5611 struct hl_cs_parser *parser)
5613 u64 patched_cb_handle;
5614 u32 patched_cb_size;
5615 struct hl_cb *user_cb;
5619 * The new CB should have space at the end for two MSG_PROT pkt:
5620 * 1. A packet that will act as a completion packet
5621 * 2. A packet that will generate MSI interrupt
5623 if (parser->completion)
5624 parser->patched_cb_size = parser->user_cb_size +
5625 sizeof(struct packet_msg_prot) * 2;
5627 parser->patched_cb_size = parser->user_cb_size;
5629 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5630 parser->patched_cb_size, false, false,
5631 &patched_cb_handle);
5635 "Failed to allocate patched CB for DMA CS %d\n",
5640 patched_cb_handle >>= PAGE_SHIFT;
5641 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5642 (u32) patched_cb_handle);
5643 /* hl_cb_get should never fail */
5644 if (!parser->patched_cb) {
5645 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5646 (u32) patched_cb_handle);
5652 * The check that parser->user_cb_size <= parser->user_cb->size was done
5653 * in validate_queue_index().
5655 memcpy(parser->patched_cb->kernel_address,
5656 parser->user_cb->kernel_address,
5657 parser->user_cb_size);
5659 patched_cb_size = parser->patched_cb_size;
5661 /* Validate patched CB instead of user CB */
5662 user_cb = parser->user_cb;
5663 parser->user_cb = parser->patched_cb;
5664 rc = gaudi_validate_cb(hdev, parser, true);
5665 parser->user_cb = user_cb;
5668 hl_cb_put(parser->patched_cb);
5672 if (patched_cb_size != parser->patched_cb_size) {
5673 dev_err(hdev->dev, "user CB size mismatch\n");
5674 hl_cb_put(parser->patched_cb);
5681 * Always call cb destroy here because we still have 1 reference
5682 * to it by calling cb_get earlier. After the job will be completed,
5683 * cb_put will release it, but here we want to remove it from the
5686 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5687 patched_cb_handle << PAGE_SHIFT);
5692 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5693 struct hl_cs_parser *parser)
5695 u64 patched_cb_handle;
5698 rc = gaudi_validate_cb(hdev, parser, false);
5703 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5704 parser->patched_cb_size, false, false,
5705 &patched_cb_handle);
5708 "Failed to allocate patched CB for DMA CS %d\n", rc);
5712 patched_cb_handle >>= PAGE_SHIFT;
5713 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5714 (u32) patched_cb_handle);
5715 /* hl_cb_get should never fail here */
5716 if (!parser->patched_cb) {
5717 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5718 (u32) patched_cb_handle);
5723 rc = gaudi_patch_cb(hdev, parser);
5726 hl_cb_put(parser->patched_cb);
5730 * Always call cb destroy here because we still have 1 reference
5731 * to it by calling cb_get earlier. After the job will be completed,
5732 * cb_put will release it, but here we want to remove it from the
5735 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5736 patched_cb_handle << PAGE_SHIFT);
5740 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5744 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5745 struct hl_cs_parser *parser)
5747 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5748 struct gaudi_device *gaudi = hdev->asic_specific;
5749 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5750 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5752 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5753 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5754 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5755 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5756 parser->hw_queue_id);
5760 /* For internal queue jobs just check if CB address is valid */
5761 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5762 parser->user_cb_size,
5763 asic_prop->sram_user_base_address,
5764 asic_prop->sram_end_address))
5767 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5768 parser->user_cb_size,
5769 asic_prop->dram_user_base_address,
5770 asic_prop->dram_end_address))
5773 /* PMMU and HPMMU addresses are equal, check only one of them */
5774 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5775 parser->user_cb_size,
5776 asic_prop->pmmu.start_addr,
5777 asic_prop->pmmu.end_addr))
5781 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5782 parser->user_cb, parser->user_cb_size);
5787 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5789 struct gaudi_device *gaudi = hdev->asic_specific;
5791 if (parser->queue_type == QUEUE_TYPE_INT)
5792 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5794 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5795 return gaudi_parse_cb_mmu(hdev, parser);
5797 return gaudi_parse_cb_no_mmu(hdev, parser);
5800 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5801 void *kernel_address, u32 len,
5802 u64 cq_addr, u32 cq_val, u32 msi_vec,
5805 struct gaudi_device *gaudi = hdev->asic_specific;
5806 struct packet_msg_prot *cq_pkt;
5810 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5812 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5813 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5816 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5818 cq_pkt->ctl = cpu_to_le32(tmp);
5819 cq_pkt->value = cpu_to_le32(cq_val);
5820 cq_pkt->addr = cpu_to_le64(cq_addr);
5824 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5825 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5826 cq_pkt->ctl = cpu_to_le32(tmp);
5827 cq_pkt->value = cpu_to_le32(1);
5829 if (gaudi->multi_msi_mode)
5830 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5832 msi_addr = mmPCIE_CORE_MSI_REQ;
5834 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5837 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5839 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5842 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5845 struct packet_lin_dma *lin_dma_pkt;
5846 struct hl_cs_job *job;
5847 u32 cb_size, ctl, err_cause;
5852 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5856 lin_dma_pkt = cb->kernel_address;
5857 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5858 cb_size = sizeof(*lin_dma_pkt);
5860 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5861 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5862 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5863 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5864 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5866 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5867 lin_dma_pkt->src_addr = cpu_to_le64(val);
5868 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5869 lin_dma_pkt->tsize = cpu_to_le32(size);
5871 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5873 dev_err(hdev->dev, "Failed to allocate a new job\n");
5878 /* Verify DMA is OK */
5879 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5880 if (err_cause && !hdev->init_done) {
5882 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5884 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5889 atomic_inc(&job->user_cb->cs_cnt);
5890 job->user_cb_size = cb_size;
5891 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5892 job->patched_cb = job->user_cb;
5893 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5895 hl_debugfs_add_job(hdev, job);
5897 rc = gaudi_send_job_on_qman0(hdev, job);
5898 hl_debugfs_remove_job(hdev, job);
5900 atomic_dec(&cb->cs_cnt);
5902 /* Verify DMA is OK */
5903 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5905 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5907 if (!hdev->init_done) {
5909 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5911 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5918 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5923 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5924 u32 num_regs, u32 val)
5926 struct packet_msg_long *pkt;
5927 struct hl_cs_job *job;
5932 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5934 if (cb_size > SZ_2M) {
5935 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5939 cb = hl_cb_kernel_create(hdev, cb_size, false);
5943 pkt = cb->kernel_address;
5945 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5946 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5947 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5948 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5949 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5951 for (i = 0; i < num_regs ; i++, pkt++) {
5952 pkt->ctl = cpu_to_le32(ctl);
5953 pkt->value = cpu_to_le32(val);
5954 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5957 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5959 dev_err(hdev->dev, "Failed to allocate a new job\n");
5966 atomic_inc(&job->user_cb->cs_cnt);
5967 job->user_cb_size = cb_size;
5968 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5969 job->patched_cb = job->user_cb;
5970 job->job_cb_size = cb_size;
5972 hl_debugfs_add_job(hdev, job);
5974 rc = gaudi_send_job_on_qman0(hdev, job);
5975 hl_debugfs_remove_job(hdev, job);
5977 atomic_dec(&cb->cs_cnt);
5981 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5986 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5992 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5993 num_regs = NUM_OF_SOB_IN_BLOCK;
5994 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5996 dev_err(hdev->dev, "failed resetting SM registers");
6000 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
6001 num_regs = NUM_OF_SOB_IN_BLOCK;
6002 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6004 dev_err(hdev->dev, "failed resetting SM registers");
6008 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6009 num_regs = NUM_OF_SOB_IN_BLOCK;
6010 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6012 dev_err(hdev->dev, "failed resetting SM registers");
6016 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6017 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6018 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6020 dev_err(hdev->dev, "failed resetting SM registers");
6024 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6025 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6026 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6028 dev_err(hdev->dev, "failed resetting SM registers");
6032 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6033 num_regs = NUM_OF_MONITORS_IN_BLOCK;
6034 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6036 dev_err(hdev->dev, "failed resetting SM registers");
6040 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6041 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6042 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6043 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6045 dev_err(hdev->dev, "failed resetting SM registers");
6049 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6050 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6051 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6052 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6054 dev_err(hdev->dev, "failed resetting SM registers");
6061 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6063 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6064 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6067 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6068 u64 sob_addr = CFG_BASE +
6069 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6071 u32 dma_offset = i * DMA_CORE_OFFSET;
6073 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6074 lower_32_bits(sob_addr));
6075 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6076 upper_32_bits(sob_addr));
6077 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6079 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6080 * modified by the user for SRAM reduction
6083 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6088 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6093 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6094 qman_offset = i * DMA_QMAN_OFFSET;
6095 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6098 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6099 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6100 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6103 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6104 qman_offset = i * TPC_QMAN_OFFSET;
6105 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6108 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6109 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6110 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6111 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6115 static int gaudi_restore_user_registers(struct hl_device *hdev)
6119 rc = gaudi_restore_sm_registers(hdev);
6123 gaudi_restore_dma_registers(hdev);
6124 gaudi_restore_qm_registers(hdev);
6129 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6134 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6136 struct asic_fixed_properties *prop = &hdev->asic_prop;
6137 struct gaudi_device *gaudi = hdev->asic_specific;
6138 u64 addr = prop->mmu_pgt_addr;
6139 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6141 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6144 return gaudi_memset_device_memory(hdev, addr, size, 0);
6147 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6152 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6153 bool user_address, u32 *val)
6155 struct asic_fixed_properties *prop = &hdev->asic_prop;
6156 struct gaudi_device *gaudi = hdev->asic_specific;
6157 u64 hbm_bar_addr, host_phys_end;
6160 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6162 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6164 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6165 (hdev->clock_gating_mask &
6166 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6168 dev_err_ratelimited(hdev->dev,
6169 "Can't read register - clock gating is enabled!\n");
6172 *val = RREG32(addr - CFG_BASE);
6175 } else if ((addr >= SRAM_BASE_ADDR) &&
6176 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6177 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6178 (addr - SRAM_BASE_ADDR));
6179 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6180 u64 bar_base_addr = DRAM_PHYS_BASE +
6181 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6183 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6184 if (hbm_bar_addr != U64_MAX) {
6185 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6186 (addr - bar_base_addr));
6188 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6191 if (hbm_bar_addr == U64_MAX)
6193 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6194 user_address && !iommu_present(&pci_bus_type)) {
6195 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6203 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6204 bool user_address, u32 val)
6206 struct asic_fixed_properties *prop = &hdev->asic_prop;
6207 struct gaudi_device *gaudi = hdev->asic_specific;
6208 u64 hbm_bar_addr, host_phys_end;
6211 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6213 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6215 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6216 (hdev->clock_gating_mask &
6217 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6219 dev_err_ratelimited(hdev->dev,
6220 "Can't write register - clock gating is enabled!\n");
6223 WREG32(addr - CFG_BASE, val);
6226 } else if ((addr >= SRAM_BASE_ADDR) &&
6227 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6228 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6229 (addr - SRAM_BASE_ADDR));
6230 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6231 u64 bar_base_addr = DRAM_PHYS_BASE +
6232 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6234 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6235 if (hbm_bar_addr != U64_MAX) {
6236 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6237 (addr - bar_base_addr));
6239 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6242 if (hbm_bar_addr == U64_MAX)
6244 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6245 user_address && !iommu_present(&pci_bus_type)) {
6246 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6254 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6255 bool user_address, u64 *val)
6257 struct asic_fixed_properties *prop = &hdev->asic_prop;
6258 struct gaudi_device *gaudi = hdev->asic_specific;
6259 u64 hbm_bar_addr, host_phys_end;
6262 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6264 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6266 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6267 (hdev->clock_gating_mask &
6268 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6270 dev_err_ratelimited(hdev->dev,
6271 "Can't read register - clock gating is enabled!\n");
6274 u32 val_l = RREG32(addr - CFG_BASE);
6275 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6277 *val = (((u64) val_h) << 32) | val_l;
6280 } else if ((addr >= SRAM_BASE_ADDR) &&
6281 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6282 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6283 (addr - SRAM_BASE_ADDR));
6285 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6286 u64 bar_base_addr = DRAM_PHYS_BASE +
6287 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6289 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6290 if (hbm_bar_addr != U64_MAX) {
6291 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6292 (addr - bar_base_addr));
6294 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6297 if (hbm_bar_addr == U64_MAX)
6299 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6300 user_address && !iommu_present(&pci_bus_type)) {
6301 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6309 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6310 bool user_address, u64 val)
6312 struct asic_fixed_properties *prop = &hdev->asic_prop;
6313 struct gaudi_device *gaudi = hdev->asic_specific;
6314 u64 hbm_bar_addr, host_phys_end;
6317 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6319 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6321 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6322 (hdev->clock_gating_mask &
6323 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6325 dev_err_ratelimited(hdev->dev,
6326 "Can't write register - clock gating is enabled!\n");
6329 WREG32(addr - CFG_BASE, lower_32_bits(val));
6330 WREG32(addr + sizeof(u32) - CFG_BASE,
6331 upper_32_bits(val));
6334 } else if ((addr >= SRAM_BASE_ADDR) &&
6335 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6336 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6337 (addr - SRAM_BASE_ADDR));
6339 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6340 u64 bar_base_addr = DRAM_PHYS_BASE +
6341 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6343 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6344 if (hbm_bar_addr != U64_MAX) {
6345 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6346 (addr - bar_base_addr));
6348 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6351 if (hbm_bar_addr == U64_MAX)
6353 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6354 user_address && !iommu_present(&pci_bus_type)) {
6355 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6363 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6364 u32 size_to_dma, dma_addr_t dma_addr)
6370 dma_offset = dma_id * DMA_CORE_OFFSET;
6372 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6373 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6374 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6375 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6376 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6377 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6378 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6380 rc = hl_poll_timeout(
6382 mmDMA0_CORE_STS0 + dma_offset,
6384 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6390 "DMA %d timed-out during reading of 0x%llx\n",
6395 /* Verify DMA is OK */
6396 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6398 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6400 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6402 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6410 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6413 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6414 struct gaudi_device *gaudi = hdev->asic_specific;
6415 u64 dma_offset, qm_offset;
6416 dma_addr_t dma_addr;
6421 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6424 GFP_KERNEL | __GFP_ZERO);
6429 mutex_lock(&gaudi->clk_gate_mutex);
6431 hdev->asic_funcs->disable_clock_gating(hdev);
6433 hdev->asic_funcs->hw_queues_lock(hdev);
6435 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6436 dma_offset = dma_id * DMA_CORE_OFFSET;
6437 qm_offset = dma_id * DMA_QMAN_OFFSET;
6438 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6439 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6442 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6443 dma_offset = dma_id * DMA_CORE_OFFSET;
6444 qm_offset = dma_id * DMA_QMAN_OFFSET;
6445 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6446 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6449 dev_err_ratelimited(hdev->dev,
6450 "Can't read via DMA because it is BUSY\n");
6456 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6457 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6458 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6460 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6461 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6464 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6466 /* Verify DMA is OK */
6467 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6470 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6472 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6477 size_to_dma = SZ_2M;
6479 while (size_left > 0) {
6481 if (size_left < SZ_2M)
6482 size_to_dma = size_left;
6484 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6489 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6491 if (size_left <= SZ_2M)
6499 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6500 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6503 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6504 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6506 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6509 hdev->asic_funcs->hw_queues_unlock(hdev);
6511 hdev->asic_funcs->set_clock_gating(hdev);
6513 mutex_unlock(&gaudi->clk_gate_mutex);
6515 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6521 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6523 struct gaudi_device *gaudi = hdev->asic_specific;
6525 if (hdev->hard_reset_pending)
6528 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6529 (addr - gaudi->hbm_bar_cur_addr));
6532 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6534 struct gaudi_device *gaudi = hdev->asic_specific;
6536 if (hdev->hard_reset_pending)
6539 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6540 (addr - gaudi->hbm_bar_cur_addr));
6543 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6545 /* mask to zero the MMBP and ASID bits */
6546 WREG32_AND(reg, ~0x7FF);
6547 WREG32_OR(reg, asid);
6550 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6552 struct gaudi_device *gaudi = hdev->asic_specific;
6554 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6557 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6558 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6562 mutex_lock(&gaudi->clk_gate_mutex);
6564 hdev->asic_funcs->disable_clock_gating(hdev);
6566 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6568 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6580 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6582 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6584 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6586 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6587 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6594 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6602 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6603 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6604 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6605 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6606 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6608 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6609 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6610 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6611 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6612 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6614 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6615 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6616 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6617 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6618 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6619 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6620 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6621 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6623 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6624 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6625 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6626 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6627 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6628 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6629 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6631 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6632 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6633 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6634 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6635 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6636 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6637 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6639 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6640 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6641 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6642 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6643 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6644 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6645 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6647 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6648 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6649 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6650 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6651 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6652 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6653 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6655 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6656 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6657 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6658 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6659 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6660 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6661 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6663 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6664 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6665 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6666 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6667 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6668 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6669 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6671 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6672 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6673 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6674 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6675 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6676 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6677 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6679 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6680 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6681 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6682 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6683 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6684 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6685 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6687 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6688 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6689 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6690 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6691 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6692 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6693 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6694 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6695 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6696 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6698 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6699 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6700 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6701 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6702 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6703 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6704 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6705 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6706 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6707 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6708 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6709 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6711 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6712 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6714 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6716 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6718 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6720 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6724 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6725 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6727 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6729 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6731 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6733 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6737 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6738 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6740 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6742 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6744 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6746 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6750 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6751 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6753 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6755 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6757 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6759 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6763 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6764 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6766 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6768 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6770 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6772 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6776 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6777 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6779 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6781 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6783 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6785 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6789 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6790 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6792 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6794 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6796 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6798 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6802 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6803 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6805 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6807 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6809 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6811 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6815 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6816 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6818 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6820 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6822 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6824 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6828 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6829 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6831 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6833 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6835 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6837 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6841 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6842 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6844 hdev->asic_funcs->set_clock_gating(hdev);
6846 mutex_unlock(&gaudi->clk_gate_mutex);
6849 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6850 struct hl_cs_job *job)
6852 struct packet_msg_prot *fence_pkt;
6854 dma_addr_t fence_dma_addr;
6856 u32 tmp, timeout, dma_offset;
6860 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6862 timeout = HL_DEVICE_TIMEOUT_USEC;
6864 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6865 dev_err_ratelimited(hdev->dev,
6866 "Can't send driver job on QMAN0 because the device is not idle\n");
6870 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6874 "Failed to allocate fence memory for QMAN0\n");
6878 cb = job->patched_cb;
6880 fence_pkt = cb->kernel_address +
6881 job->job_cb_size - sizeof(struct packet_msg_prot);
6883 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6884 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6885 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6887 fence_pkt->ctl = cpu_to_le32(tmp);
6888 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6889 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6891 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6893 WREG32(mmDMA0_CORE_PROT + dma_offset,
6894 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6896 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6897 job->job_cb_size, cb->bus_address);
6899 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6900 goto free_fence_ptr;
6903 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6904 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6907 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6909 if (rc == -ETIMEDOUT) {
6910 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6911 goto free_fence_ptr;
6915 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6917 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6922 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6924 if (event_type >= GAUDI_EVENT_SIZE)
6925 goto event_not_supported;
6927 if (!gaudi_irq_map_table[event_type].valid)
6928 goto event_not_supported;
6930 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6934 event_not_supported:
6935 snprintf(desc, size, "N/A");
6938 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6939 u32 x_y, bool is_write)
6941 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6943 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6944 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6947 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6948 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6952 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6953 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6957 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6958 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6962 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6963 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6968 goto unknown_initiator;
6971 for (i = 0 ; i < 2 ; i++) {
6972 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6973 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6977 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6978 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6979 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6981 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6984 return "DMA0 or DMA2";
6985 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6986 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6987 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6989 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6992 return "DMA1 or DMA3";
6993 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6994 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6995 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6997 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7000 return "DMA4 or DMA6";
7001 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7002 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7003 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7005 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7008 return "DMA5 or DMA7";
7012 return "unknown initiator";
7015 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7018 u32 val, x_y, axi_id;
7020 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7021 RREG32(mmMMU_UP_RAZWI_READ_ID);
7022 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7023 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7024 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7025 RAZWI_INITIATOR_AXI_ID_SHIFT);
7028 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7029 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7031 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7034 case RAZWI_INITIATOR_ID_X_Y_TPC1:
7036 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7037 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7039 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7040 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7042 case RAZWI_INITIATOR_ID_X_Y_TPC2:
7044 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7045 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7047 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7049 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7051 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7054 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7056 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7057 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7058 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7059 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7060 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7061 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7062 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7063 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7064 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7066 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7068 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7071 case RAZWI_INITIATOR_ID_X_Y_TPC5:
7073 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7074 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7076 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7077 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7079 case RAZWI_INITIATOR_ID_X_Y_TPC6:
7081 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7082 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7084 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7086 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7094 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7096 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7097 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7098 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7099 RAZWI_INITIATOR_AXI_ID_MASK);
7101 return "unknown initiator";
7104 static void gaudi_print_razwi_info(struct hl_device *hdev)
7106 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7107 dev_err_ratelimited(hdev->dev,
7108 "RAZWI event caused by illegal write of %s\n",
7109 gaudi_get_razwi_initiator_name(hdev, true));
7110 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7113 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7114 dev_err_ratelimited(hdev->dev,
7115 "RAZWI event caused by illegal read of %s\n",
7116 gaudi_get_razwi_initiator_name(hdev, false));
7117 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7121 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7123 struct gaudi_device *gaudi = hdev->asic_specific;
7127 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7130 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7131 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7132 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7134 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7136 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7139 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7142 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7143 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7144 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7146 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7148 dev_err_ratelimited(hdev->dev,
7149 "MMU access error on va 0x%llx\n", addr);
7151 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7156 * +-------------------+------------------------------------------------------+
7157 * | Configuration Reg | Description |
7159 * +-------------------+------------------------------------------------------+
7160 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7161 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7162 * | |0xF34 memory wrappers 63:32 |
7163 * | |0xF38 memory wrappers 95:64 |
7164 * | |0xF3C memory wrappers 127:96 |
7165 * +-------------------+------------------------------------------------------+
7166 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7167 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7168 * | |0xF44 memory wrappers 63:32 |
7169 * | |0xF48 memory wrappers 95:64 |
7170 * | |0xF4C memory wrappers 127:96 |
7171 * +-------------------+------------------------------------------------------+
7173 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7174 struct ecc_info_extract_params *params, u64 *ecc_address,
7175 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7177 struct gaudi_device *gaudi = hdev->asic_specific;
7178 u32 i, num_mem_regs, reg, err_bit;
7179 u64 err_addr, err_word = 0;
7182 num_mem_regs = params->num_memories / 32 +
7183 ((params->num_memories % 32) ? 1 : 0);
7185 if (params->block_address >= CFG_BASE)
7186 params->block_address -= CFG_BASE;
7189 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7191 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7193 if (params->disable_clock_gating) {
7194 mutex_lock(&gaudi->clk_gate_mutex);
7195 hdev->asic_funcs->disable_clock_gating(hdev);
7198 /* Set invalid wrapper index */
7199 *memory_wrapper_idx = 0xFF;
7201 /* Iterate through memory wrappers, a single bit must be set */
7202 for (i = 0 ; i < num_mem_regs ; i++) {
7204 err_word = RREG32(err_addr);
7206 err_bit = __ffs(err_word);
7207 *memory_wrapper_idx = err_bit + (32 * i);
7212 if (*memory_wrapper_idx == 0xFF) {
7213 dev_err(hdev->dev, "ECC error information cannot be found\n");
7215 goto enable_clk_gate;
7218 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7219 *memory_wrapper_idx);
7222 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7224 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7226 /* Clear error indication */
7227 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7229 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7231 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7233 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7236 if (params->disable_clock_gating) {
7237 hdev->asic_funcs->set_clock_gating(hdev);
7239 mutex_unlock(&gaudi->clk_gate_mutex);
7246 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7248 * @idx: the current pi/ci value
7249 * @q_len: the queue length (power of 2)
7251 * @return the cyclically decremented index
7253 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7255 u32 mask = q_len - 1;
7258 * modular decrement is equivalent to adding (queue_size -1)
7259 * later we take LSBs to make sure the value is in the
7260 * range [0, queue_len - 1]
7262 return (idx + q_len - 1) & mask;
7266 * gaudi_print_sw_config_stream_data - print SW config stream data
7268 * @hdev: pointer to the habanalabs device structure
7269 * @stream: the QMAN's stream
7270 * @qman_base: base address of QMAN registers block
7272 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7275 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7276 u32 cq_ptr_lo_off, size;
7278 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7280 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7281 stream * cq_ptr_lo_off;
7282 cq_ptr_hi = cq_ptr_lo +
7283 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7284 cq_tsize = cq_ptr_lo +
7285 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7287 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7288 size = RREG32(cq_tsize);
7289 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7290 stream, cq_ptr, size);
7294 * gaudi_print_last_pqes_on_err - print last PQEs on error
7296 * @hdev: pointer to the habanalabs device structure
7297 * @qid_base: first QID of the QMAN (out of 4 streams)
7298 * @stream: the QMAN's stream
7299 * @qman_base: base address of QMAN registers block
7300 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7302 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7303 u32 stream, u64 qman_base,
7306 u32 ci, qm_ci_stream_off, queue_len;
7307 struct hl_hw_queue *q;
7311 q = &hdev->kernel_queues[qid_base + stream];
7313 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7314 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7315 stream * qm_ci_stream_off;
7317 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7318 q->int_queue_len : HL_QUEUE_LENGTH;
7320 hdev->asic_funcs->hw_queues_lock(hdev);
7323 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7327 /* we should start printing form ci -1 */
7328 ci = gaudi_queue_idx_dec(ci, queue_len);
7330 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7335 bd = q->kernel_address;
7338 len = le32_to_cpu(bd->len);
7339 /* len 0 means uninitialized entry- break */
7343 addr = le64_to_cpu(bd->ptr);
7345 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7346 stream, ci, addr, len);
7348 /* get previous ci, wrap if needed */
7349 ci = gaudi_queue_idx_dec(ci, queue_len);
7352 hdev->asic_funcs->hw_queues_unlock(hdev);
7356 * print_qman_data_on_err - extract QMAN data on error
7358 * @hdev: pointer to the habanalabs device structure
7359 * @qid_base: first QID of the QMAN (out of 4 streams)
7360 * @stream: the QMAN's stream
7361 * @qman_base: base address of QMAN registers block
7363 * This function attempt to exatract as much data as possible on QMAN error.
7364 * On upper CP print the SW config stream data and last 8 PQEs.
7365 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7367 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7368 u32 stream, u64 qman_base)
7372 if (stream != QMAN_STREAMS) {
7373 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7378 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7380 for (i = 0; i < QMAN_STREAMS; i++)
7381 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7385 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7386 const char *qm_name,
7390 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7391 u64 glbl_sts_addr, arb_err_addr;
7394 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7395 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7397 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7398 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7399 glbl_sts_clr_val = 0;
7400 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7405 if (i == QMAN_STREAMS)
7406 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7408 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7410 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7411 if (glbl_sts_val & BIT(j)) {
7412 dev_err_ratelimited(hdev->dev,
7413 "%s %s. err cause: %s\n",
7415 gaudi_qman_error_cause[j]);
7416 glbl_sts_clr_val |= BIT(j);
7420 /* Write 1 clear errors */
7421 if (!hdev->stop_on_err)
7422 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7424 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7427 arb_err_val = RREG32(arb_err_addr);
7432 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7433 if (arb_err_val & BIT(j)) {
7434 dev_err_ratelimited(hdev->dev,
7435 "%s ARB_ERR. err cause: %s\n",
7437 gaudi_qman_arb_error_cause[j]);
7442 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7443 struct hl_eq_sm_sei_data *sei_data)
7445 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7447 /* Flip the bits as the enum is ordered in the opposite way */
7448 index = (index ^ 0x3) & 0x3;
7450 switch (sei_data->sei_cause) {
7451 case SM_SEI_SO_OVERFLOW:
7452 dev_err_ratelimited(hdev->dev,
7453 "%s SEI Error: SOB Group %u overflow/underflow",
7454 gaudi_sync_manager_names[index],
7455 le32_to_cpu(sei_data->sei_log));
7457 case SM_SEI_LBW_4B_UNALIGNED:
7458 dev_err_ratelimited(hdev->dev,
7459 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7460 gaudi_sync_manager_names[index],
7461 le32_to_cpu(sei_data->sei_log));
7463 case SM_SEI_AXI_RESPONSE_ERR:
7464 dev_err_ratelimited(hdev->dev,
7465 "%s SEI Error: AXI ID %u response error",
7466 gaudi_sync_manager_names[index],
7467 le32_to_cpu(sei_data->sei_log));
7470 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7471 le32_to_cpu(sei_data->sei_log));
7476 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7477 struct hl_eq_ecc_data *ecc_data)
7479 struct ecc_info_extract_params params;
7480 u64 ecc_address = 0, ecc_syndrom = 0;
7481 u8 index, memory_wrapper_idx = 0;
7482 bool extract_info_from_fw;
7485 if (hdev->asic_prop.fw_security_enabled) {
7486 extract_info_from_fw = true;
7487 goto extract_ecc_info;
7490 switch (event_type) {
7491 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7492 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7493 extract_info_from_fw = true;
7495 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7496 index = event_type - GAUDI_EVENT_TPC0_SERR;
7497 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7498 params.num_memories = 90;
7499 params.derr = false;
7500 params.disable_clock_gating = true;
7501 extract_info_from_fw = false;
7503 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7504 index = event_type - GAUDI_EVENT_TPC0_DERR;
7505 params.block_address =
7506 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7507 params.num_memories = 90;
7509 params.disable_clock_gating = true;
7510 extract_info_from_fw = false;
7512 case GAUDI_EVENT_MME0_ACC_SERR:
7513 case GAUDI_EVENT_MME1_ACC_SERR:
7514 case GAUDI_EVENT_MME2_ACC_SERR:
7515 case GAUDI_EVENT_MME3_ACC_SERR:
7516 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7517 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7518 params.num_memories = 128;
7519 params.derr = false;
7520 params.disable_clock_gating = true;
7521 extract_info_from_fw = false;
7523 case GAUDI_EVENT_MME0_ACC_DERR:
7524 case GAUDI_EVENT_MME1_ACC_DERR:
7525 case GAUDI_EVENT_MME2_ACC_DERR:
7526 case GAUDI_EVENT_MME3_ACC_DERR:
7527 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7528 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7529 params.num_memories = 128;
7531 params.disable_clock_gating = true;
7532 extract_info_from_fw = false;
7534 case GAUDI_EVENT_MME0_SBAB_SERR:
7535 case GAUDI_EVENT_MME1_SBAB_SERR:
7536 case GAUDI_EVENT_MME2_SBAB_SERR:
7537 case GAUDI_EVENT_MME3_SBAB_SERR:
7538 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7539 params.block_address =
7540 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7541 params.num_memories = 33;
7542 params.derr = false;
7543 params.disable_clock_gating = true;
7544 extract_info_from_fw = false;
7546 case GAUDI_EVENT_MME0_SBAB_DERR:
7547 case GAUDI_EVENT_MME1_SBAB_DERR:
7548 case GAUDI_EVENT_MME2_SBAB_DERR:
7549 case GAUDI_EVENT_MME3_SBAB_DERR:
7550 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7551 params.block_address =
7552 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7553 params.num_memories = 33;
7555 params.disable_clock_gating = true;
7556 extract_info_from_fw = false;
7563 if (extract_info_from_fw) {
7564 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7565 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7566 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7568 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7569 &ecc_syndrom, &memory_wrapper_idx);
7575 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7576 ecc_address, ecc_syndrom, memory_wrapper_idx);
7579 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7586 switch (event_type) {
7587 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7588 index = event_type - GAUDI_EVENT_TPC0_QM;
7589 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7590 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7591 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7593 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7594 index = event_type - GAUDI_EVENT_MME0_QM;
7595 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7596 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7597 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7599 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7600 index = event_type - GAUDI_EVENT_DMA0_QM;
7601 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7602 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7605 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7606 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7608 case GAUDI_EVENT_NIC0_QM0:
7609 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7610 qman_base = mmNIC0_QM0_BASE;
7611 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7613 case GAUDI_EVENT_NIC0_QM1:
7614 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7615 qman_base = mmNIC0_QM1_BASE;
7616 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7618 case GAUDI_EVENT_NIC1_QM0:
7619 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7620 qman_base = mmNIC1_QM0_BASE;
7621 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7623 case GAUDI_EVENT_NIC1_QM1:
7624 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7625 qman_base = mmNIC1_QM1_BASE;
7626 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7628 case GAUDI_EVENT_NIC2_QM0:
7629 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7630 qman_base = mmNIC2_QM0_BASE;
7631 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7633 case GAUDI_EVENT_NIC2_QM1:
7634 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7635 qman_base = mmNIC2_QM1_BASE;
7636 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7638 case GAUDI_EVENT_NIC3_QM0:
7639 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7640 qman_base = mmNIC3_QM0_BASE;
7641 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7643 case GAUDI_EVENT_NIC3_QM1:
7644 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7645 qman_base = mmNIC3_QM1_BASE;
7646 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7648 case GAUDI_EVENT_NIC4_QM0:
7649 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7650 qman_base = mmNIC4_QM0_BASE;
7651 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7653 case GAUDI_EVENT_NIC4_QM1:
7654 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7655 qman_base = mmNIC4_QM1_BASE;
7656 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7662 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7665 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7670 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7671 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7675 gaudi_print_razwi_info(hdev);
7676 gaudi_print_mmu_error_info(hdev);
7680 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7681 struct cpucp_pkt_sync_err *sync_err)
7683 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7685 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7686 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7689 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7690 struct hl_eq_fw_alive *fw_alive)
7693 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7694 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7695 "Minor" : "Critical", fw_alive->process_id,
7696 fw_alive->thread_id, fw_alive->uptime_seconds);
7699 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7701 struct gaudi_device *gaudi = hdev->asic_specific;
7703 /* Unmask all IRQs since some could have been received
7704 * during the soft reset
7706 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7709 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7710 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7712 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7715 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7716 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7717 if (!hbm_ecc_data) {
7718 dev_err(hdev->dev, "No FW ECC data");
7722 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7723 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7724 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7725 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7726 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7727 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7728 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7729 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7730 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7731 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7732 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7733 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7734 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7735 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7738 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7739 device, ch, wr_par, rd_par, ca_par, serr, derr);
7741 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7742 device, ch, hbm_ecc_data->first_addr, type,
7743 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7744 hbm_ecc_data->dec_cnt);
7748 if (hdev->asic_prop.fw_security_enabled) {
7749 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7753 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7754 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7755 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7756 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7760 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7761 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7762 (val >> 2) & 0x1, (val >> 3) & 0x1,
7765 val2 = RREG32(base + ch * 0x1000 + 0x060);
7767 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7769 RREG32(base + ch * 0x1000 + 0x064),
7770 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7771 (val2 & 0xFF0000) >> 16,
7772 (val2 & 0xFF000000) >> 24);
7775 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7776 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7780 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7781 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7782 (val >> 2) & 0x1, (val >> 3) & 0x1,
7785 val2 = RREG32(base + ch * 0x1000 + 0x070);
7787 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7789 RREG32(base + ch * 0x1000 + 0x074),
7790 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7791 (val2 & 0xFF0000) >> 16,
7792 (val2 & 0xFF000000) >> 24);
7795 /* Clear interrupts */
7796 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7797 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7798 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7799 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7800 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7801 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7804 val = RREG32(base + 0x8F30);
7805 val2 = RREG32(base + 0x8F34);
7809 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7812 val = RREG32(base + 0x8F40);
7813 val2 = RREG32(base + 0x8F44);
7817 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7824 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7826 switch (hbm_event_type) {
7827 case GAUDI_EVENT_HBM0_SPI_0:
7828 case GAUDI_EVENT_HBM0_SPI_1:
7830 case GAUDI_EVENT_HBM1_SPI_0:
7831 case GAUDI_EVENT_HBM1_SPI_1:
7833 case GAUDI_EVENT_HBM2_SPI_0:
7834 case GAUDI_EVENT_HBM2_SPI_1:
7836 case GAUDI_EVENT_HBM3_SPI_0:
7837 case GAUDI_EVENT_HBM3_SPI_1:
7843 /* Should never happen */
7847 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7848 char *interrupt_name)
7850 struct gaudi_device *gaudi = hdev->asic_specific;
7851 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7852 bool soft_reset_required = false;
7854 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7855 * gating, and thus cannot be done in CPU-CP and should be done instead
7859 mutex_lock(&gaudi->clk_gate_mutex);
7861 hdev->asic_funcs->disable_clock_gating(hdev);
7863 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7864 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7866 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7867 if (tpc_interrupts_cause & BIT(i)) {
7868 dev_err_ratelimited(hdev->dev,
7869 "TPC%d_%s interrupt cause: %s\n",
7870 tpc_id, interrupt_name,
7871 gaudi_tpc_interrupts_cause[i]);
7872 /* If this is QM error, we need to soft-reset */
7874 soft_reset_required = true;
7877 /* Clear interrupts */
7878 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7880 hdev->asic_funcs->set_clock_gating(hdev);
7882 mutex_unlock(&gaudi->clk_gate_mutex);
7884 return soft_reset_required;
7887 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7889 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7892 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7894 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7897 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7900 switch (event_type) {
7901 case GAUDI_EVENT_FIX_POWER_ENV_S:
7902 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7903 dev_info_ratelimited(hdev->dev,
7904 "Clock throttling due to power consumption\n");
7907 case GAUDI_EVENT_FIX_POWER_ENV_E:
7908 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7909 dev_info_ratelimited(hdev->dev,
7910 "Power envelop is safe, back to optimal clock\n");
7913 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7914 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7915 dev_info_ratelimited(hdev->dev,
7916 "Clock throttling due to overheating\n");
7919 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7920 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7921 dev_info_ratelimited(hdev->dev,
7922 "Thermal envelop is safe, back to optimal clock\n");
7926 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7932 static void gaudi_handle_eqe(struct hl_device *hdev,
7933 struct hl_eq_entry *eq_entry)
7935 struct gaudi_device *gaudi = hdev->asic_specific;
7936 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7937 u32 fw_fatal_err_flag = 0;
7938 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7939 >> EQ_CTL_EVENT_TYPE_SHIFT);
7940 bool reset_required;
7944 if (event_type >= GAUDI_EVENT_SIZE) {
7945 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7946 event_type, GAUDI_EVENT_SIZE - 1);
7950 gaudi->events_stat[event_type]++;
7951 gaudi->events_stat_aggregate[event_type]++;
7953 switch (event_type) {
7954 case GAUDI_EVENT_PCIE_CORE_DERR:
7955 case GAUDI_EVENT_PCIE_IF_DERR:
7956 case GAUDI_EVENT_PCIE_PHY_DERR:
7957 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7958 case GAUDI_EVENT_MME0_ACC_DERR:
7959 case GAUDI_EVENT_MME0_SBAB_DERR:
7960 case GAUDI_EVENT_MME1_ACC_DERR:
7961 case GAUDI_EVENT_MME1_SBAB_DERR:
7962 case GAUDI_EVENT_MME2_ACC_DERR:
7963 case GAUDI_EVENT_MME2_SBAB_DERR:
7964 case GAUDI_EVENT_MME3_ACC_DERR:
7965 case GAUDI_EVENT_MME3_SBAB_DERR:
7966 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7968 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7969 case GAUDI_EVENT_PSOC_MEM_DERR:
7970 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7971 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7972 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7973 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7974 case GAUDI_EVENT_MMU_DERR:
7975 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7976 gaudi_print_irq_info(hdev, event_type, true);
7977 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7978 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
7981 case GAUDI_EVENT_GIC500:
7982 case GAUDI_EVENT_AXI_ECC:
7983 case GAUDI_EVENT_L2_RAM_ECC:
7984 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7985 gaudi_print_irq_info(hdev, event_type, false);
7986 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
7989 case GAUDI_EVENT_HBM0_SPI_0:
7990 case GAUDI_EVENT_HBM1_SPI_0:
7991 case GAUDI_EVENT_HBM2_SPI_0:
7992 case GAUDI_EVENT_HBM3_SPI_0:
7993 gaudi_print_irq_info(hdev, event_type, false);
7994 gaudi_hbm_read_interrupts(hdev,
7995 gaudi_hbm_event_to_dev(event_type),
7996 &eq_entry->hbm_ecc_data);
7997 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
8000 case GAUDI_EVENT_HBM0_SPI_1:
8001 case GAUDI_EVENT_HBM1_SPI_1:
8002 case GAUDI_EVENT_HBM2_SPI_1:
8003 case GAUDI_EVENT_HBM3_SPI_1:
8004 gaudi_print_irq_info(hdev, event_type, false);
8005 gaudi_hbm_read_interrupts(hdev,
8006 gaudi_hbm_event_to_dev(event_type),
8007 &eq_entry->hbm_ecc_data);
8008 hl_fw_unmask_irq(hdev, event_type);
8011 case GAUDI_EVENT_TPC0_DEC:
8012 case GAUDI_EVENT_TPC1_DEC:
8013 case GAUDI_EVENT_TPC2_DEC:
8014 case GAUDI_EVENT_TPC3_DEC:
8015 case GAUDI_EVENT_TPC4_DEC:
8016 case GAUDI_EVENT_TPC5_DEC:
8017 case GAUDI_EVENT_TPC6_DEC:
8018 case GAUDI_EVENT_TPC7_DEC:
8019 gaudi_print_irq_info(hdev, event_type, true);
8020 reset_required = gaudi_tpc_read_interrupts(hdev,
8021 tpc_dec_event_to_tpc_id(event_type),
8022 "AXI_SLV_DEC_Error");
8023 if (reset_required) {
8024 dev_err(hdev->dev, "reset required due to %s\n",
8025 gaudi_irq_map_table[event_type].name);
8027 hl_device_reset(hdev, 0);
8029 hl_fw_unmask_irq(hdev, event_type);
8033 case GAUDI_EVENT_TPC0_KRN_ERR:
8034 case GAUDI_EVENT_TPC1_KRN_ERR:
8035 case GAUDI_EVENT_TPC2_KRN_ERR:
8036 case GAUDI_EVENT_TPC3_KRN_ERR:
8037 case GAUDI_EVENT_TPC4_KRN_ERR:
8038 case GAUDI_EVENT_TPC5_KRN_ERR:
8039 case GAUDI_EVENT_TPC6_KRN_ERR:
8040 case GAUDI_EVENT_TPC7_KRN_ERR:
8041 gaudi_print_irq_info(hdev, event_type, true);
8042 reset_required = gaudi_tpc_read_interrupts(hdev,
8043 tpc_krn_event_to_tpc_id(event_type),
8045 if (reset_required) {
8046 dev_err(hdev->dev, "reset required due to %s\n",
8047 gaudi_irq_map_table[event_type].name);
8049 hl_device_reset(hdev, 0);
8051 hl_fw_unmask_irq(hdev, event_type);
8055 case GAUDI_EVENT_PCIE_CORE_SERR:
8056 case GAUDI_EVENT_PCIE_IF_SERR:
8057 case GAUDI_EVENT_PCIE_PHY_SERR:
8058 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8059 case GAUDI_EVENT_MME0_ACC_SERR:
8060 case GAUDI_EVENT_MME0_SBAB_SERR:
8061 case GAUDI_EVENT_MME1_ACC_SERR:
8062 case GAUDI_EVENT_MME1_SBAB_SERR:
8063 case GAUDI_EVENT_MME2_ACC_SERR:
8064 case GAUDI_EVENT_MME2_SBAB_SERR:
8065 case GAUDI_EVENT_MME3_ACC_SERR:
8066 case GAUDI_EVENT_MME3_SBAB_SERR:
8067 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8068 case GAUDI_EVENT_CPU_IF_ECC_SERR:
8069 case GAUDI_EVENT_PSOC_MEM_SERR:
8070 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8071 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8072 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8073 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8075 case GAUDI_EVENT_MMU_SERR:
8076 gaudi_print_irq_info(hdev, event_type, true);
8077 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8078 hl_fw_unmask_irq(hdev, event_type);
8081 case GAUDI_EVENT_PCIE_DEC:
8082 case GAUDI_EVENT_MME0_WBC_RSP:
8083 case GAUDI_EVENT_MME0_SBAB0_RSP:
8084 case GAUDI_EVENT_MME1_WBC_RSP:
8085 case GAUDI_EVENT_MME1_SBAB0_RSP:
8086 case GAUDI_EVENT_MME2_WBC_RSP:
8087 case GAUDI_EVENT_MME2_SBAB0_RSP:
8088 case GAUDI_EVENT_MME3_WBC_RSP:
8089 case GAUDI_EVENT_MME3_SBAB0_RSP:
8090 case GAUDI_EVENT_CPU_AXI_SPLITTER:
8091 case GAUDI_EVENT_PSOC_AXI_DEC:
8092 case GAUDI_EVENT_PSOC_PRSTN_FALL:
8093 case GAUDI_EVENT_MMU_PAGE_FAULT:
8094 case GAUDI_EVENT_MMU_WR_PERM:
8095 case GAUDI_EVENT_RAZWI_OR_ADC:
8096 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8097 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8098 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8100 case GAUDI_EVENT_NIC0_QM0:
8101 case GAUDI_EVENT_NIC0_QM1:
8102 case GAUDI_EVENT_NIC1_QM0:
8103 case GAUDI_EVENT_NIC1_QM1:
8104 case GAUDI_EVENT_NIC2_QM0:
8105 case GAUDI_EVENT_NIC2_QM1:
8106 case GAUDI_EVENT_NIC3_QM0:
8107 case GAUDI_EVENT_NIC3_QM1:
8108 case GAUDI_EVENT_NIC4_QM0:
8109 case GAUDI_EVENT_NIC4_QM1:
8110 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8111 gaudi_print_irq_info(hdev, event_type, true);
8112 gaudi_handle_qman_err(hdev, event_type);
8113 hl_fw_unmask_irq(hdev, event_type);
8116 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8117 gaudi_print_irq_info(hdev, event_type, true);
8120 case GAUDI_EVENT_TPC0_BMON_SPMU:
8121 case GAUDI_EVENT_TPC1_BMON_SPMU:
8122 case GAUDI_EVENT_TPC2_BMON_SPMU:
8123 case GAUDI_EVENT_TPC3_BMON_SPMU:
8124 case GAUDI_EVENT_TPC4_BMON_SPMU:
8125 case GAUDI_EVENT_TPC5_BMON_SPMU:
8126 case GAUDI_EVENT_TPC6_BMON_SPMU:
8127 case GAUDI_EVENT_TPC7_BMON_SPMU:
8128 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8129 gaudi_print_irq_info(hdev, event_type, false);
8130 hl_fw_unmask_irq(hdev, event_type);
8133 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8134 gaudi_print_irq_info(hdev, event_type, false);
8135 gaudi_print_sm_sei_info(hdev, event_type,
8136 &eq_entry->sm_sei_data);
8137 rc = hl_state_dump(hdev);
8140 "Error during system state dump %d\n", rc);
8141 hl_fw_unmask_irq(hdev, event_type);
8144 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8145 gaudi_print_clk_change_info(hdev, event_type);
8146 hl_fw_unmask_irq(hdev, event_type);
8149 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8150 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8152 "Received high temp H/W interrupt %d (cause %d)\n",
8156 case GAUDI_EVENT_DEV_RESET_REQ:
8157 gaudi_print_irq_info(hdev, event_type, false);
8160 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8161 gaudi_print_irq_info(hdev, event_type, false);
8162 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8165 case GAUDI_EVENT_FW_ALIVE_S:
8166 gaudi_print_irq_info(hdev, event_type, false);
8167 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8171 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8179 if (hdev->asic_prop.fw_security_enabled)
8180 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
8181 else if (hdev->hard_reset_on_fw_events)
8182 hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
8184 hl_fw_unmask_irq(hdev, event_type);
8187 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8190 struct gaudi_device *gaudi = hdev->asic_specific;
8193 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8194 return gaudi->events_stat_aggregate;
8197 *size = (u32) sizeof(gaudi->events_stat);
8198 return gaudi->events_stat;
8201 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8204 struct gaudi_device *gaudi = hdev->asic_specific;
8205 u32 status, timeout_usec;
8208 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8209 hdev->hard_reset_pending)
8213 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8215 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8217 /* L0 & L1 invalidation */
8218 WREG32(mmSTLB_INV_PS, 3);
8219 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8220 WREG32(mmSTLB_INV_PS, 2);
8222 rc = hl_poll_timeout(
8230 WREG32(mmSTLB_INV_SET, 0);
8233 dev_err_ratelimited(hdev->dev,
8234 "MMU cache invalidation timeout\n");
8235 hl_device_reset(hdev, HL_RESET_HARD);
8241 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8242 bool is_hard, u32 flags,
8243 u32 asid, u64 va, u64 size)
8245 /* Treat as invalidate all because there is no range invalidation
8248 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8251 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8252 u32 asid, u64 phys_addr)
8254 u32 status, timeout_usec;
8258 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8260 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8262 WREG32(MMU_ASID, asid);
8263 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8264 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8265 WREG32(MMU_BUSY, 0x80000000);
8267 rc = hl_poll_timeout(
8271 !(status & 0x80000000),
8277 "Timeout during MMU hop0 config of asid %d\n", asid);
8284 static int gaudi_send_heartbeat(struct hl_device *hdev)
8286 struct gaudi_device *gaudi = hdev->asic_specific;
8288 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8291 return hl_fw_send_heartbeat(hdev);
8294 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8296 struct gaudi_device *gaudi = hdev->asic_specific;
8297 struct asic_fixed_properties *prop = &hdev->asic_prop;
8300 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8303 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8304 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8309 if (!strlen(prop->cpucp_info.card_name))
8310 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8313 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8315 set_default_power_values(hdev);
8317 hdev->max_power = prop->max_power_default;
8322 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8323 u8 mask_len, struct seq_file *s)
8325 struct gaudi_device *gaudi = hdev->asic_specific;
8326 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8327 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8328 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8329 unsigned long *mask = (unsigned long *)mask_arr;
8330 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8331 bool is_idle = true, is_eng_idle, is_slave;
8333 int i, dma_id, port;
8335 mutex_lock(&gaudi->clk_gate_mutex);
8337 hdev->asic_funcs->disable_clock_gating(hdev);
8341 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8342 "--- ------- ------------ ---------- -------------\n");
8344 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8345 dma_id = gaudi_dma_assignment[i];
8346 offset = dma_id * DMA_QMAN_OFFSET;
8348 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8349 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8350 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8351 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8352 IS_DMA_IDLE(dma_core_sts0);
8353 is_idle &= is_eng_idle;
8355 if (mask && !is_eng_idle)
8356 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8358 seq_printf(s, fmt, dma_id,
8359 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8360 qm_cgm_sts, dma_core_sts0);
8365 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8366 "--- ------- ------------ ---------- ----------\n");
8368 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8369 offset = i * TPC_QMAN_OFFSET;
8370 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8371 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8372 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8373 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8374 IS_TPC_IDLE(tpc_cfg_sts);
8375 is_idle &= is_eng_idle;
8377 if (mask && !is_eng_idle)
8378 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8380 seq_printf(s, fmt, i,
8381 is_eng_idle ? "Y" : "N",
8382 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8387 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8388 "--- ------- ------------ ---------- -----------\n");
8390 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8391 offset = i * MME_QMAN_OFFSET;
8392 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8393 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8395 /* MME 1 & 3 are slaves, no need to check their QMANs */
8398 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8399 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8400 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8403 is_idle &= is_eng_idle;
8405 if (mask && !is_eng_idle)
8406 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8409 seq_printf(s, fmt, i,
8410 is_eng_idle ? "Y" : "N",
8411 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8413 seq_printf(s, mme_slave_fmt, i,
8414 is_eng_idle ? "Y" : "N", "-",
8420 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8421 "--- ------- ------------ ----------\n");
8423 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8424 offset = i * NIC_MACRO_QMAN_OFFSET;
8426 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8427 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8428 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8429 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8430 is_idle &= is_eng_idle;
8432 if (mask && !is_eng_idle)
8433 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8435 seq_printf(s, nic_fmt, port,
8436 is_eng_idle ? "Y" : "N",
8437 qm_glbl_sts0, qm_cgm_sts);
8441 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8442 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8443 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8444 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8445 is_idle &= is_eng_idle;
8447 if (mask && !is_eng_idle)
8448 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8450 seq_printf(s, nic_fmt, port,
8451 is_eng_idle ? "Y" : "N",
8452 qm_glbl_sts0, qm_cgm_sts);
8459 hdev->asic_funcs->set_clock_gating(hdev);
8461 mutex_unlock(&gaudi->clk_gate_mutex);
8466 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8467 __acquires(&gaudi->hw_queues_lock)
8469 struct gaudi_device *gaudi = hdev->asic_specific;
8471 spin_lock(&gaudi->hw_queues_lock);
8474 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8475 __releases(&gaudi->hw_queues_lock)
8477 struct gaudi_device *gaudi = hdev->asic_specific;
8479 spin_unlock(&gaudi->hw_queues_lock);
8482 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8484 return hdev->pdev->device;
8487 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8490 struct gaudi_device *gaudi = hdev->asic_specific;
8492 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8495 return hl_fw_get_eeprom_data(hdev, data, max_size);
8499 * this function should be used only during initialization and/or after reset,
8500 * when there are no active users.
8502 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8505 struct gaudi_device *gaudi = hdev->asic_specific;
8510 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8513 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8515 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8517 mutex_lock(&gaudi->clk_gate_mutex);
8519 hdev->asic_funcs->disable_clock_gating(hdev);
8521 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8522 lower_32_bits(tpc_kernel));
8523 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8524 upper_32_bits(tpc_kernel));
8526 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8527 lower_32_bits(tpc_kernel));
8528 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8529 upper_32_bits(tpc_kernel));
8530 /* set a valid LUT pointer, content is of no significance */
8531 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8532 lower_32_bits(tpc_kernel));
8533 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8534 upper_32_bits(tpc_kernel));
8536 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8537 lower_32_bits(CFG_BASE +
8538 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8540 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8541 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8542 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8543 /* wait a bit for the engine to start executing */
8544 usleep_range(1000, 1500);
8546 /* wait until engine has finished executing */
8547 rc = hl_poll_timeout(
8549 mmTPC0_CFG_STATUS + offset,
8551 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8552 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8558 "Timeout while waiting for TPC%d icache prefetch\n",
8560 hdev->asic_funcs->set_clock_gating(hdev);
8561 mutex_unlock(&gaudi->clk_gate_mutex);
8565 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8566 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8568 /* wait a bit for the engine to start executing */
8569 usleep_range(1000, 1500);
8571 /* wait until engine has finished executing */
8572 rc = hl_poll_timeout(
8574 mmTPC0_CFG_STATUS + offset,
8576 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8577 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8583 "Timeout while waiting for TPC%d vector pipe\n",
8585 hdev->asic_funcs->set_clock_gating(hdev);
8586 mutex_unlock(&gaudi->clk_gate_mutex);
8590 rc = hl_poll_timeout(
8592 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8598 hdev->asic_funcs->set_clock_gating(hdev);
8599 mutex_unlock(&gaudi->clk_gate_mutex);
8603 "Timeout while waiting for TPC%d kernel to execute\n",
8611 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8614 struct gaudi_device *gaudi = hdev->asic_specific;
8615 int min_alloc_order, rc, collective_cb_size;
8617 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8620 hdev->internal_cb_pool_virt_addr =
8621 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8622 HOST_SPACE_INTERNAL_CB_SZ,
8623 &hdev->internal_cb_pool_dma_addr,
8624 GFP_KERNEL | __GFP_ZERO);
8626 if (!hdev->internal_cb_pool_virt_addr)
8629 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8630 sizeof(struct packet_fence);
8631 min_alloc_order = ilog2(collective_cb_size);
8633 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8634 if (!hdev->internal_cb_pool) {
8636 "Failed to create internal CB pool\n");
8638 goto free_internal_cb_pool;
8641 rc = gen_pool_add(hdev->internal_cb_pool,
8642 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8643 HOST_SPACE_INTERNAL_CB_SZ, -1);
8646 "Failed to add memory to internal CB pool\n");
8648 goto destroy_internal_cb_pool;
8651 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8652 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8653 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8655 if (!hdev->internal_cb_va_base) {
8657 goto destroy_internal_cb_pool;
8660 mutex_lock(&ctx->mmu_lock);
8661 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8662 hdev->internal_cb_pool_dma_addr,
8663 HOST_SPACE_INTERNAL_CB_SZ);
8665 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8666 mutex_unlock(&ctx->mmu_lock);
8669 goto unreserve_internal_cb_pool;
8673 unreserve_internal_cb_pool:
8674 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8675 HOST_SPACE_INTERNAL_CB_SZ);
8676 destroy_internal_cb_pool:
8677 gen_pool_destroy(hdev->internal_cb_pool);
8678 free_internal_cb_pool:
8679 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8680 HOST_SPACE_INTERNAL_CB_SZ,
8681 hdev->internal_cb_pool_virt_addr,
8682 hdev->internal_cb_pool_dma_addr);
8687 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8690 struct gaudi_device *gaudi = hdev->asic_specific;
8692 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8695 mutex_lock(&ctx->mmu_lock);
8696 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8697 HOST_SPACE_INTERNAL_CB_SZ);
8698 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8699 HOST_SPACE_INTERNAL_CB_SZ);
8700 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8701 mutex_unlock(&ctx->mmu_lock);
8703 gen_pool_destroy(hdev->internal_cb_pool);
8705 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8706 HOST_SPACE_INTERNAL_CB_SZ,
8707 hdev->internal_cb_pool_virt_addr,
8708 hdev->internal_cb_pool_dma_addr);
8711 static int gaudi_ctx_init(struct hl_ctx *ctx)
8715 if (ctx->asid == HL_KERNEL_ASID_ID)
8718 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8722 rc = gaudi_restore_user_registers(ctx->hdev);
8724 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8729 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8731 if (ctx->asid == HL_KERNEL_ASID_ID)
8734 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8737 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8739 return gaudi_cq_assignment[cq_idx];
8742 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8744 return sizeof(struct packet_msg_short) +
8745 sizeof(struct packet_msg_prot) * 2;
8748 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8750 return sizeof(struct packet_msg_short) * 4 +
8751 sizeof(struct packet_fence) +
8752 sizeof(struct packet_msg_prot) * 2;
8755 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8757 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8760 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8763 struct hl_cb *cb = (struct hl_cb *) data;
8764 struct packet_msg_short *pkt;
8765 u32 value, ctl, pkt_size = sizeof(*pkt);
8767 pkt = cb->kernel_address + size;
8768 memset(pkt, 0, pkt_size);
8770 /* Inc by 1, Mode ADD */
8771 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8772 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8774 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8775 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8776 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8777 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8778 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8779 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8780 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8782 pkt->value = cpu_to_le32(value);
8783 pkt->ctl = cpu_to_le32(ctl);
8785 return size + pkt_size;
8788 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8791 u32 ctl, pkt_size = sizeof(*pkt);
8793 memset(pkt, 0, pkt_size);
8795 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8796 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8797 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8798 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8799 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8800 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8802 pkt->value = cpu_to_le32(value);
8803 pkt->ctl = cpu_to_le32(ctl);
8808 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8809 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8810 u16 sob_val, u16 mon_id)
8813 u32 ctl, value, pkt_size = sizeof(*pkt);
8814 u16 msg_addr_offset;
8817 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8819 "sob_base %u (mask %#x) is not valid\n",
8820 sob_base, sob_mask);
8825 * monitor_base should be the content of the base0 address registers,
8826 * so it will be added to the msg short offsets
8828 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8831 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8834 memset(pkt, 0, pkt_size);
8836 /* Monitor config packet: bind the monitor to a sync object */
8837 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8838 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8839 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8840 0); /* GREATER OR EQUAL*/
8841 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8843 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8844 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8845 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8846 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8847 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8848 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8849 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8851 pkt->value = cpu_to_le32(value);
8852 pkt->ctl = cpu_to_le32(ctl);
8857 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8859 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8861 memset(pkt, 0, pkt_size);
8863 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8864 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8865 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8867 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8868 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8869 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8870 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8872 pkt->cfg = cpu_to_le32(cfg);
8873 pkt->ctl = cpu_to_le32(ctl);
8878 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8880 u32 offset, nic_index;
8883 case GAUDI_QUEUE_ID_DMA_0_0:
8884 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8886 case GAUDI_QUEUE_ID_DMA_0_1:
8887 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8889 case GAUDI_QUEUE_ID_DMA_0_2:
8890 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8892 case GAUDI_QUEUE_ID_DMA_0_3:
8893 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8895 case GAUDI_QUEUE_ID_DMA_1_0:
8896 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8898 case GAUDI_QUEUE_ID_DMA_1_1:
8899 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8901 case GAUDI_QUEUE_ID_DMA_1_2:
8902 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8904 case GAUDI_QUEUE_ID_DMA_1_3:
8905 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8907 case GAUDI_QUEUE_ID_DMA_5_0:
8908 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8910 case GAUDI_QUEUE_ID_DMA_5_1:
8911 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8913 case GAUDI_QUEUE_ID_DMA_5_2:
8914 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8916 case GAUDI_QUEUE_ID_DMA_5_3:
8917 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8919 case GAUDI_QUEUE_ID_TPC_7_0:
8920 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8922 case GAUDI_QUEUE_ID_TPC_7_1:
8923 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8925 case GAUDI_QUEUE_ID_TPC_7_2:
8926 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8928 case GAUDI_QUEUE_ID_TPC_7_3:
8929 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8931 case GAUDI_QUEUE_ID_NIC_0_0:
8932 case GAUDI_QUEUE_ID_NIC_1_0:
8933 case GAUDI_QUEUE_ID_NIC_2_0:
8934 case GAUDI_QUEUE_ID_NIC_3_0:
8935 case GAUDI_QUEUE_ID_NIC_4_0:
8936 case GAUDI_QUEUE_ID_NIC_5_0:
8937 case GAUDI_QUEUE_ID_NIC_6_0:
8938 case GAUDI_QUEUE_ID_NIC_7_0:
8939 case GAUDI_QUEUE_ID_NIC_8_0:
8940 case GAUDI_QUEUE_ID_NIC_9_0:
8941 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8942 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8943 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8944 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8946 case GAUDI_QUEUE_ID_NIC_0_1:
8947 case GAUDI_QUEUE_ID_NIC_1_1:
8948 case GAUDI_QUEUE_ID_NIC_2_1:
8949 case GAUDI_QUEUE_ID_NIC_3_1:
8950 case GAUDI_QUEUE_ID_NIC_4_1:
8951 case GAUDI_QUEUE_ID_NIC_5_1:
8952 case GAUDI_QUEUE_ID_NIC_6_1:
8953 case GAUDI_QUEUE_ID_NIC_7_1:
8954 case GAUDI_QUEUE_ID_NIC_8_1:
8955 case GAUDI_QUEUE_ID_NIC_9_1:
8956 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8957 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8958 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8959 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8961 case GAUDI_QUEUE_ID_NIC_0_2:
8962 case GAUDI_QUEUE_ID_NIC_1_2:
8963 case GAUDI_QUEUE_ID_NIC_2_2:
8964 case GAUDI_QUEUE_ID_NIC_3_2:
8965 case GAUDI_QUEUE_ID_NIC_4_2:
8966 case GAUDI_QUEUE_ID_NIC_5_2:
8967 case GAUDI_QUEUE_ID_NIC_6_2:
8968 case GAUDI_QUEUE_ID_NIC_7_2:
8969 case GAUDI_QUEUE_ID_NIC_8_2:
8970 case GAUDI_QUEUE_ID_NIC_9_2:
8971 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8972 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8973 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8974 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8976 case GAUDI_QUEUE_ID_NIC_0_3:
8977 case GAUDI_QUEUE_ID_NIC_1_3:
8978 case GAUDI_QUEUE_ID_NIC_2_3:
8979 case GAUDI_QUEUE_ID_NIC_3_3:
8980 case GAUDI_QUEUE_ID_NIC_4_3:
8981 case GAUDI_QUEUE_ID_NIC_5_3:
8982 case GAUDI_QUEUE_ID_NIC_6_3:
8983 case GAUDI_QUEUE_ID_NIC_7_3:
8984 case GAUDI_QUEUE_ID_NIC_8_3:
8985 case GAUDI_QUEUE_ID_NIC_9_3:
8986 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8987 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8988 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8989 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8995 *addr = CFG_BASE + offset;
9000 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
9004 u16 msg_addr_offset;
9007 * monitor_base should be the content of the base0 address registers,
9008 * so it will be added to the msg short offsets
9010 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9012 /* First monitor config packet: low address of the sync */
9014 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9017 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9020 /* Second monitor config packet: high address of the sync */
9022 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9025 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9029 * Third monitor config packet: the payload, i.e. what to write when the
9033 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9036 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9041 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9042 struct hl_gen_wait_properties *prop)
9044 struct hl_cb *cb = (struct hl_cb *) prop->data;
9045 void *buf = cb->kernel_address;
9047 u32 size = prop->size;
9049 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9050 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9055 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9056 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9057 prop->sob_mask, prop->sob_val, prop->mon_id);
9058 size += gaudi_add_fence_pkt(buf + size);
9063 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9065 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9067 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9070 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9071 hw_sob->sob_id * 4, 0);
9073 kref_init(&hw_sob->kref);
9076 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9078 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9079 HL_POWER9_HOST_MAGIC) {
9080 hdev->power9_64bit_dma_enable = 1;
9081 hdev->dma_mask = 64;
9083 hdev->power9_64bit_dma_enable = 0;
9084 hdev->dma_mask = 48;
9088 static u64 gaudi_get_device_time(struct hl_device *hdev)
9090 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9092 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9095 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9096 u32 *block_size, u32 *block_id)
9101 static int gaudi_block_mmap(struct hl_device *hdev,
9102 struct vm_area_struct *vma,
9103 u32 block_id, u32 block_size)
9108 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9110 struct cpu_dyn_regs *dyn_regs =
9111 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9112 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9113 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9114 le32_to_cpu(dyn_regs->gic_host_ints_irq);
9116 WREG32(irq_handler_offset,
9117 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9120 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9123 case HL_GAUDI_CPU_PLL: return CPU_PLL;
9124 case HL_GAUDI_PCI_PLL: return PCI_PLL;
9125 case HL_GAUDI_NIC_PLL: return NIC_PLL;
9126 case HL_GAUDI_DMA_PLL: return DMA_PLL;
9127 case HL_GAUDI_MESH_PLL: return MESH_PLL;
9128 case HL_GAUDI_MME_PLL: return MME_PLL;
9129 case HL_GAUDI_TPC_PLL: return TPC_PLL;
9130 case HL_GAUDI_IF_PLL: return IF_PLL;
9131 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9132 case HL_GAUDI_HBM_PLL: return HBM_PLL;
9133 default: return -EINVAL;
9137 static int gaudi_add_sync_to_engine_map_entry(
9138 struct hl_sync_to_engine_map *map, u32 reg_value,
9139 enum hl_sync_engine_type engine_type, u32 engine_id)
9141 struct hl_sync_to_engine_map_entry *entry;
9143 /* Reg value represents a partial address of sync object,
9144 * it is used as unique identifier. For this we need to
9145 * clear the cutoff cfg base bits from the value.
9147 if (reg_value == 0 || reg_value == 0xffffffff)
9149 reg_value -= (u32)CFG_BASE;
9151 /* create a new hash entry */
9152 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9155 entry->engine_type = engine_type;
9156 entry->engine_id = engine_id;
9157 entry->sync_id = reg_value;
9158 hash_add(map->tb, &entry->node, reg_value);
9163 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9164 struct hl_sync_to_engine_map *map)
9166 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9167 struct gaudi_device *gaudi = hdev->asic_specific;
9171 /* Iterate over TPC engines */
9172 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9173 /* TPC registered must be accessed with clock gating disabled */
9174 mutex_lock(&gaudi->clk_gate_mutex);
9175 hdev->asic_funcs->disable_clock_gating(hdev);
9177 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9178 sds->props[SP_NEXT_TPC] * i);
9180 /* We can reenable clock_gating */
9181 hdev->asic_funcs->set_clock_gating(hdev);
9182 mutex_unlock(&gaudi->clk_gate_mutex);
9184 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9187 goto free_sync_to_engine_map;
9190 /* Iterate over MME engines */
9191 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9192 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9193 /* MME registered must be accessed with clock gating
9196 mutex_lock(&gaudi->clk_gate_mutex);
9197 hdev->asic_funcs->disable_clock_gating(hdev);
9199 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9200 sds->props[SP_NEXT_MME] * i +
9203 /* We can reenable clock_gating */
9204 hdev->asic_funcs->set_clock_gating(hdev);
9205 mutex_unlock(&gaudi->clk_gate_mutex);
9207 rc = gaudi_add_sync_to_engine_map_entry(
9208 map, reg_value, ENGINE_MME,
9209 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9211 goto free_sync_to_engine_map;
9215 /* Iterate over DMA engines */
9216 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9217 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9218 sds->props[SP_DMA_QUEUES_OFFSET] * i);
9219 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9222 goto free_sync_to_engine_map;
9227 free_sync_to_engine_map:
9228 hl_state_dump_free_sync_to_engine_map(map);
9233 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9236 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9240 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9242 const size_t max_write = 10;
9246 /* Sync object ID is calculated as follows:
9247 * (8 * group_id + cleared bits in mask)
9249 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9251 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9254 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9255 max_write; mask >>= 1, i++) {
9257 sob = gid * MONITOR_MAX_SOBS + i;
9260 offset += snprintf(sobs + offset, max_write,
9263 offset += snprintf(sobs + offset, max_write, "%u", sob);
9268 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9269 struct hl_device *hdev,
9270 struct hl_mon_state_dump *mon)
9273 char scratch_buf1[BIN_REG_STRING_SIZE],
9274 scratch_buf2[BIN_REG_STRING_SIZE];
9275 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9277 name = hl_state_dump_get_monitor_name(hdev, mon);
9281 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9283 return hl_snprintf_resize(
9285 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9287 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9289 hl_format_as_binary(
9290 scratch_buf1, sizeof(scratch_buf1),
9292 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9294 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9297 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9298 hl_format_as_binary(
9299 scratch_buf2, sizeof(scratch_buf2),
9301 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9307 static int gaudi_print_fences_single_engine(
9308 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9309 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9310 size_t *size, size_t *offset)
9312 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9313 int rc = -ENOMEM, i;
9314 u32 *statuses, *fences;
9316 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9317 sizeof(*statuses), GFP_KERNEL);
9321 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9322 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9323 sizeof(*fences), GFP_KERNEL);
9327 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9328 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9330 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9331 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9332 fences[i] = RREG32(base_offset + i * sizeof(u32));
9334 /* The actual print */
9335 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9337 u64 fence_cnt, fence_rdata;
9338 const char *engine_name;
9340 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9345 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9346 fence_cnt = base_offset + CFG_BASE +
9348 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9349 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9350 sds->props[SP_FENCE0_RDATA_OFFSET];
9351 engine_name = hl_sync_engine_to_string(engine_type);
9353 rc = hl_snprintf_resize(
9355 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9356 engine_name, engine_id,
9358 fence_cnt, engine_name, engine_id, fence_id, i,
9359 fence_rdata, engine_name, engine_id, fence_id, i,
9377 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9378 .monitor_valid = gaudi_monitor_valid,
9379 .print_single_monitor = gaudi_print_single_monitor,
9380 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9381 .print_fences_single_engine = gaudi_print_fences_single_engine,
9384 static void gaudi_state_dump_init(struct hl_device *hdev)
9386 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9389 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9390 hash_add(sds->so_id_to_str_tb,
9391 &gaudi_so_id_to_str[i].node,
9392 gaudi_so_id_to_str[i].id);
9394 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9395 hash_add(sds->monitor_id_to_str_tb,
9396 &gaudi_monitor_id_to_str[i].node,
9397 gaudi_monitor_id_to_str[i].id);
9399 sds->props = gaudi_state_dump_specs_props;
9401 sds->sync_namager_names = gaudi_sync_manager_names;
9403 sds->funcs = gaudi_state_dump_funcs;
9406 static u32 *gaudi_get_stream_master_qid_arr(void)
9408 return gaudi_stream_master;
9411 static const struct hl_asic_funcs gaudi_funcs = {
9412 .early_init = gaudi_early_init,
9413 .early_fini = gaudi_early_fini,
9414 .late_init = gaudi_late_init,
9415 .late_fini = gaudi_late_fini,
9416 .sw_init = gaudi_sw_init,
9417 .sw_fini = gaudi_sw_fini,
9418 .hw_init = gaudi_hw_init,
9419 .hw_fini = gaudi_hw_fini,
9420 .halt_engines = gaudi_halt_engines,
9421 .suspend = gaudi_suspend,
9422 .resume = gaudi_resume,
9424 .ring_doorbell = gaudi_ring_doorbell,
9425 .pqe_write = gaudi_pqe_write,
9426 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9427 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9428 .scrub_device_mem = gaudi_scrub_device_mem,
9429 .get_int_queue_base = gaudi_get_int_queue_base,
9430 .test_queues = gaudi_test_queues,
9431 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9432 .asic_dma_pool_free = gaudi_dma_pool_free,
9433 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9434 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9435 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9436 .cs_parser = gaudi_cs_parser,
9437 .asic_dma_map_sg = gaudi_dma_map_sg,
9438 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9439 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9440 .update_eq_ci = gaudi_update_eq_ci,
9441 .context_switch = gaudi_context_switch,
9442 .restore_phase_topology = gaudi_restore_phase_topology,
9443 .debugfs_read32 = gaudi_debugfs_read32,
9444 .debugfs_write32 = gaudi_debugfs_write32,
9445 .debugfs_read64 = gaudi_debugfs_read64,
9446 .debugfs_write64 = gaudi_debugfs_write64,
9447 .debugfs_read_dma = gaudi_debugfs_read_dma,
9448 .add_device_attr = hl_add_device_attr,
9449 .handle_eqe = gaudi_handle_eqe,
9450 .set_pll_profile = hl_set_pll_profile,
9451 .get_events_stat = gaudi_get_events_stat,
9452 .read_pte = gaudi_read_pte,
9453 .write_pte = gaudi_write_pte,
9454 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9455 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9456 .send_heartbeat = gaudi_send_heartbeat,
9457 .set_clock_gating = gaudi_set_clock_gating,
9458 .disable_clock_gating = gaudi_disable_clock_gating,
9459 .debug_coresight = gaudi_debug_coresight,
9460 .is_device_idle = gaudi_is_device_idle,
9461 .soft_reset_late_init = gaudi_soft_reset_late_init,
9462 .hw_queues_lock = gaudi_hw_queues_lock,
9463 .hw_queues_unlock = gaudi_hw_queues_unlock,
9464 .get_pci_id = gaudi_get_pci_id,
9465 .get_eeprom_data = gaudi_get_eeprom_data,
9466 .send_cpu_message = gaudi_send_cpu_message,
9467 .pci_bars_map = gaudi_pci_bars_map,
9468 .init_iatu = gaudi_init_iatu,
9471 .halt_coresight = gaudi_halt_coresight,
9472 .ctx_init = gaudi_ctx_init,
9473 .ctx_fini = gaudi_ctx_fini,
9474 .get_clk_rate = hl_get_clk_rate,
9475 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9476 .load_firmware_to_device = gaudi_load_firmware_to_device,
9477 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9478 .get_signal_cb_size = gaudi_get_signal_cb_size,
9479 .get_wait_cb_size = gaudi_get_wait_cb_size,
9480 .gen_signal_cb = gaudi_gen_signal_cb,
9481 .gen_wait_cb = gaudi_gen_wait_cb,
9482 .reset_sob = gaudi_reset_sob,
9483 .reset_sob_group = gaudi_reset_sob_group,
9484 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9485 .get_device_time = gaudi_get_device_time,
9486 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9487 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9488 .scramble_addr = hl_mmu_scramble_addr,
9489 .descramble_addr = hl_mmu_descramble_addr,
9490 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9491 .get_hw_block_id = gaudi_get_hw_block_id,
9492 .hw_block_mmap = gaudi_block_mmap,
9493 .enable_events_from_fw = gaudi_enable_events_from_fw,
9494 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9495 .init_firmware_loader = gaudi_init_firmware_loader,
9496 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9497 .state_dump_init = gaudi_state_dump_init,
9498 .get_sob_addr = gaudi_get_sob_addr,
9499 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9500 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9504 * gaudi_set_asic_funcs - set GAUDI function pointers
9506 * @hdev: pointer to hl_device structure
9509 void gaudi_set_asic_funcs(struct hl_device *hdev)
9511 hdev->asic_funcs = &gaudi_funcs;