1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
82 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
84 #define GAUDI_MAX_STRING_LEN 20
86 #define GAUDI_CB_POOL_CB_CNT 512
87 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
89 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
91 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
93 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
95 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
97 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
99 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
100 BIT(GAUDI_ENGINE_ID_MME_0) |\
101 BIT(GAUDI_ENGINE_ID_MME_2) |\
102 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
106 #define GAUDI_PLL_MAX 10
108 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
109 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
110 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
111 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
116 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
117 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
118 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
121 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
122 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
123 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
126 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
127 [0] = GAUDI_QUEUE_ID_DMA_0_0,
128 [1] = GAUDI_QUEUE_ID_DMA_0_1,
129 [2] = GAUDI_QUEUE_ID_DMA_0_2,
130 [3] = GAUDI_QUEUE_ID_DMA_0_3,
131 [4] = GAUDI_QUEUE_ID_DMA_1_0,
132 [5] = GAUDI_QUEUE_ID_DMA_1_1,
133 [6] = GAUDI_QUEUE_ID_DMA_1_2,
134 [7] = GAUDI_QUEUE_ID_DMA_1_3,
137 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
138 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
139 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
140 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
141 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
142 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
143 [PACKET_REPEAT] = sizeof(struct packet_repeat),
144 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
145 [PACKET_FENCE] = sizeof(struct packet_fence),
146 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
147 [PACKET_NOP] = sizeof(struct packet_nop),
148 [PACKET_STOP] = sizeof(struct packet_stop),
149 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
150 [PACKET_WAIT] = sizeof(struct packet_wait),
151 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
154 static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
155 [CPU_PLL] = mmPSOC_CPU_PLL_NR,
156 [PCI_PLL] = mmPSOC_PCI_PLL_NR,
157 [SRAM_PLL] = mmSRAM_W_PLL_NR,
158 [HBM_PLL] = mmPSOC_HBM_PLL_NR,
159 [NIC_PLL] = mmNIC0_PLL_NR,
160 [DMA_PLL] = mmDMA_W_PLL_NR,
161 [MESH_PLL] = mmMESH_W_PLL_NR,
162 [MME_PLL] = mmPSOC_MME_PLL_NR,
163 [TPC_PLL] = mmPSOC_TPC_PLL_NR,
164 [IF_PLL] = mmIF_W_PLL_NR
167 static inline bool validate_packet_id(enum packet_id id)
171 case PACKET_WREG_BULK:
172 case PACKET_MSG_LONG:
173 case PACKET_MSG_SHORT:
176 case PACKET_MSG_PROT:
181 case PACKET_ARB_POINT:
183 case PACKET_LOAD_AND_EXE:
190 static const char * const
191 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
192 "tpc_address_exceed_slm",
194 "tpc_spu_mac_overflow",
195 "tpc_spu_addsub_overflow",
196 "tpc_spu_abs_overflow",
197 "tpc_spu_fp_dst_nan_inf",
198 "tpc_spu_fp_dst_denorm",
199 "tpc_vpu_mac_overflow",
200 "tpc_vpu_addsub_overflow",
201 "tpc_vpu_abs_overflow",
202 "tpc_vpu_fp_dst_nan_inf",
203 "tpc_vpu_fp_dst_denorm",
205 "tpc_illegal_instruction",
206 "tpc_pc_wrap_around",
214 static const char * const
215 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219 "CP error due to undefined OPCODE",
220 "CP encountered STOP OPCODE",
222 "CP WRREG32 or WRBULK returned error",
224 "FENCE 0 inc over max value and clipped",
225 "FENCE 1 inc over max value and clipped",
226 "FENCE 2 inc over max value and clipped",
227 "FENCE 3 inc over max value and clipped",
228 "FENCE 0 dec under min value and clipped",
229 "FENCE 1 dec under min value and clipped",
230 "FENCE 2 dec under min value and clipped",
231 "FENCE 3 dec under min value and clipped"
234 static const char * const
235 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
236 "Choice push while full error",
237 "Choice Q watchdog error",
238 "MSG AXI LBW returned with error"
241 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
244 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
245 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
246 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
247 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
248 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
249 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
250 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
349 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
350 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
351 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
352 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
353 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
354 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
357 struct ecc_info_extract_params {
361 bool disable_clock_gating;
364 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
366 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
367 struct hl_cs_job *job);
368 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
370 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
372 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
373 static int gaudi_cpucp_info_get(struct hl_device *hdev);
374 static void gaudi_disable_clock_gating(struct hl_device *hdev);
375 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
376 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
378 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
379 struct hl_gen_wait_properties *prop);
381 static inline enum hl_collective_mode
382 get_collective_mode(struct hl_device *hdev, u32 queue_id)
384 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
385 return HL_COLLECTIVE_MASTER;
387 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
388 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
389 return HL_COLLECTIVE_SLAVE;
391 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
392 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
393 return HL_COLLECTIVE_SLAVE;
395 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
396 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
397 return HL_COLLECTIVE_SLAVE;
399 return HL_COLLECTIVE_NOT_SUPPORTED;
402 static int gaudi_get_fixed_properties(struct hl_device *hdev)
404 struct asic_fixed_properties *prop = &hdev->asic_prop;
405 u32 num_sync_stream_queues = 0;
408 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
409 prop->hw_queues_props = kcalloc(prop->max_queues,
410 sizeof(struct hw_queue_properties),
413 if (!prop->hw_queues_props)
416 for (i = 0 ; i < prop->max_queues ; i++) {
417 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
418 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
419 prop->hw_queues_props[i].driver_only = 0;
420 prop->hw_queues_props[i].supports_sync_stream = 1;
421 prop->hw_queues_props[i].cb_alloc_flags =
423 num_sync_stream_queues++;
424 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
425 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
426 prop->hw_queues_props[i].driver_only = 1;
427 prop->hw_queues_props[i].supports_sync_stream = 0;
428 prop->hw_queues_props[i].cb_alloc_flags =
430 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
431 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
432 prop->hw_queues_props[i].driver_only = 0;
433 prop->hw_queues_props[i].supports_sync_stream = 0;
434 prop->hw_queues_props[i].cb_alloc_flags =
438 prop->hw_queues_props[i].collective_mode =
439 get_collective_mode(hdev, i);
442 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
443 prop->collective_first_sob = 0;
444 prop->collective_first_mon = 0;
446 /* 2 SOBs per internal queue stream are reserved for collective */
447 prop->sync_stream_first_sob =
448 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
449 * QMAN_STREAMS * HL_RSVD_SOBS;
451 /* 1 monitor per internal queue stream are reserved for collective
452 * 2 monitors per external queue stream are reserved for collective
454 prop->sync_stream_first_mon =
455 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
456 (NUMBER_OF_EXT_HW_QUEUES * 2);
458 prop->dram_base_address = DRAM_PHYS_BASE;
459 prop->dram_size = GAUDI_HBM_SIZE_32GB;
460 prop->dram_end_address = prop->dram_base_address +
462 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
464 prop->sram_base_address = SRAM_BASE_ADDR;
465 prop->sram_size = SRAM_SIZE;
466 prop->sram_end_address = prop->sram_base_address +
468 prop->sram_user_base_address = prop->sram_base_address +
469 SRAM_USER_BASE_OFFSET;
471 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
473 prop->mmu_pgt_size = 0x800000; /* 8MB */
475 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
476 prop->mmu_pte_size = HL_PTE_SIZE;
477 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
478 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
479 prop->dram_page_size = PAGE_SIZE_2MB;
480 prop->dram_supports_virtual_memory = false;
482 prop->pmmu.hop0_shift = HOP0_SHIFT;
483 prop->pmmu.hop1_shift = HOP1_SHIFT;
484 prop->pmmu.hop2_shift = HOP2_SHIFT;
485 prop->pmmu.hop3_shift = HOP3_SHIFT;
486 prop->pmmu.hop4_shift = HOP4_SHIFT;
487 prop->pmmu.hop0_mask = HOP0_MASK;
488 prop->pmmu.hop1_mask = HOP1_MASK;
489 prop->pmmu.hop2_mask = HOP2_MASK;
490 prop->pmmu.hop3_mask = HOP3_MASK;
491 prop->pmmu.hop4_mask = HOP4_MASK;
492 prop->pmmu.start_addr = VA_HOST_SPACE_START;
493 prop->pmmu.end_addr =
494 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
495 prop->pmmu.page_size = PAGE_SIZE_4KB;
496 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
498 /* PMMU and HPMMU are the same except of page size */
499 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
500 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
502 /* shifts and masks are the same in PMMU and DMMU */
503 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
504 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
505 prop->dmmu.end_addr = VA_HOST_SPACE_END;
506 prop->dmmu.page_size = PAGE_SIZE_2MB;
508 prop->cfg_size = CFG_SIZE;
509 prop->max_asid = MAX_ASID;
510 prop->num_of_events = GAUDI_EVENT_SIZE;
511 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
513 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
515 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
516 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
518 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
519 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
521 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
524 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
526 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
527 prop->sync_stream_first_sob +
528 (num_sync_stream_queues * HL_RSVD_SOBS);
529 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
530 prop->sync_stream_first_mon +
531 (num_sync_stream_queues * HL_RSVD_MONS);
533 /* disable fw security for now, set it in a later stage */
534 prop->fw_security_disabled = true;
535 prop->fw_security_status_valid = false;
536 prop->hard_reset_done_by_fw = false;
541 static int gaudi_pci_bars_map(struct hl_device *hdev)
543 static const char * const name[] = {"SRAM", "CFG", "HBM"};
544 bool is_wc[3] = {false, false, true};
547 rc = hl_pci_bars_map(hdev, name, is_wc);
551 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
552 (CFG_BASE - SPI_FLASH_BASE_ADDR);
557 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
559 struct gaudi_device *gaudi = hdev->asic_specific;
560 struct hl_inbound_pci_region pci_region;
564 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
567 /* Inbound Region 2 - Bar 4 - Point to HBM */
568 pci_region.mode = PCI_BAR_MATCH_MODE;
569 pci_region.bar = HBM_BAR_ID;
570 pci_region.addr = addr;
571 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
576 old_addr = gaudi->hbm_bar_cur_addr;
577 gaudi->hbm_bar_cur_addr = addr;
583 static int gaudi_init_iatu(struct hl_device *hdev)
585 struct hl_inbound_pci_region inbound_region;
586 struct hl_outbound_pci_region outbound_region;
589 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
590 inbound_region.mode = PCI_BAR_MATCH_MODE;
591 inbound_region.bar = SRAM_BAR_ID;
592 inbound_region.addr = SRAM_BASE_ADDR;
593 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
597 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
598 inbound_region.mode = PCI_BAR_MATCH_MODE;
599 inbound_region.bar = CFG_BAR_ID;
600 inbound_region.addr = SPI_FLASH_BASE_ADDR;
601 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
605 /* Inbound Region 2 - Bar 4 - Point to HBM */
606 inbound_region.mode = PCI_BAR_MATCH_MODE;
607 inbound_region.bar = HBM_BAR_ID;
608 inbound_region.addr = DRAM_PHYS_BASE;
609 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
613 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
615 /* Outbound Region 0 - Point to Host */
616 outbound_region.addr = HOST_PHYS_BASE;
617 outbound_region.size = HOST_PHYS_SIZE;
618 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
624 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
626 return RREG32(mmHW_STATE);
629 static int gaudi_early_init(struct hl_device *hdev)
631 struct asic_fixed_properties *prop = &hdev->asic_prop;
632 struct pci_dev *pdev = hdev->pdev;
635 rc = gaudi_get_fixed_properties(hdev);
637 dev_err(hdev->dev, "Failed to get fixed properties\n");
641 /* Check BAR sizes */
642 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
644 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
646 (unsigned long long) pci_resource_len(pdev,
650 goto free_queue_props;
653 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
655 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
657 (unsigned long long) pci_resource_len(pdev,
661 goto free_queue_props;
664 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
666 rc = hl_pci_init(hdev);
668 goto free_queue_props;
670 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
672 "H/W state is dirty, must reset before initializing\n");
673 hdev->asic_funcs->hw_fini(hdev, true);
676 /* Before continuing in the initialization, we need to read the preboot
677 * version to determine whether we run with a security-enabled firmware
679 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
680 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
681 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
683 if (hdev->reset_on_preboot_fail)
684 hdev->asic_funcs->hw_fini(hdev, true);
693 kfree(hdev->asic_prop.hw_queues_props);
697 static int gaudi_early_fini(struct hl_device *hdev)
699 kfree(hdev->asic_prop.hw_queues_props);
706 * gaudi_fetch_pll_frequency - Fetch PLL frequency values
708 * @hdev: pointer to hl_device structure
709 * @pll_index: index of the pll to fetch frequency from
710 * @pll_freq: pointer to store the pll frequency in MHz in each of the available
711 * outputs. if a certain output is not available a 0 will be set
714 static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
715 enum gaudi_pll_index pll_index,
718 u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel,
719 pll_base_addr = gaudi_pll_base_addresses[pll_index];
723 if (hdev->asic_prop.fw_security_status_valid &&
724 (hdev->asic_prop.fw_app_security_map &
725 CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
726 rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
730 } else if (hdev->asic_prop.fw_security_disabled) {
731 /* Backward compatibility */
732 nr = RREG32(pll_base_addr + PLL_NR_OFFSET);
733 nf = RREG32(pll_base_addr + PLL_NF_OFFSET);
734 od = RREG32(pll_base_addr + PLL_OD_OFFSET);
736 for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) {
737 div_fctr = RREG32(pll_base_addr +
738 PLL_DIV_FACTOR_0_OFFSET + i * 4);
739 div_sel = RREG32(pll_base_addr +
740 PLL_DIV_SEL_0_OFFSET + i * 4);
742 if (div_sel == DIV_SEL_REF_CLK ||
743 div_sel == DIV_SEL_DIVIDED_REF) {
744 if (div_sel == DIV_SEL_REF_CLK)
747 freq = PLL_REF_CLK / (div_fctr + 1);
748 } else if (div_sel == DIV_SEL_PLL_CLK ||
749 div_sel == DIV_SEL_DIVIDED_PLL) {
750 pll_clk = PLL_REF_CLK * (nf + 1) /
751 ((nr + 1) * (od + 1));
752 if (div_sel == DIV_SEL_PLL_CLK)
755 freq = pll_clk / (div_fctr + 1);
758 "Received invalid div select value: %d",
762 pll_freq_arr[i] = freq;
765 dev_err(hdev->dev, "Failed to fetch PLL frequency values\n");
773 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
775 * @hdev: pointer to hl_device structure
778 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
780 struct asic_fixed_properties *prop = &hdev->asic_prop;
781 u16 pll_freq[HL_PLL_NUM_OUTPUTS];
784 rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
788 prop->psoc_timestamp_frequency = pll_freq[2];
789 prop->psoc_pci_pll_nr = 0;
790 prop->psoc_pci_pll_nf = 0;
791 prop->psoc_pci_pll_od = 0;
792 prop->psoc_pci_pll_div_factor = 0;
797 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
798 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
800 struct asic_fixed_properties *prop = &hdev->asic_prop;
801 struct packet_lin_dma *init_tpc_mem_pkt;
802 struct hl_cs_job *job;
809 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
813 init_tpc_mem_pkt = cb->kernel_address;
814 cb_size = sizeof(*init_tpc_mem_pkt);
815 memset(init_tpc_mem_pkt, 0, cb_size);
817 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
819 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
820 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
821 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
822 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
824 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
826 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
827 dst_addr = (prop->sram_user_base_address &
828 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
829 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
830 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
832 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
834 dev_err(hdev->dev, "Failed to allocate a new job\n");
841 atomic_inc(&job->user_cb->cs_cnt);
842 job->user_cb_size = cb_size;
843 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
844 job->patched_cb = job->user_cb;
845 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
847 hl_debugfs_add_job(hdev, job);
849 rc = gaudi_send_job_on_qman0(hdev, job);
854 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
855 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
861 hl_userptr_delete_list(hdev, &job->userptr_list);
862 hl_debugfs_remove_job(hdev, job);
864 atomic_dec(&cb->cs_cnt);
868 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
874 * gaudi_init_tpc_mem() - Initialize TPC memories.
875 * @hdev: Pointer to hl_device structure.
877 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
879 * Return: 0 for success, negative value for error.
881 static int gaudi_init_tpc_mem(struct hl_device *hdev)
883 const struct firmware *fw;
886 dma_addr_t dma_handle;
889 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
891 dev_err(hdev->dev, "Firmware file %s is not found!\n",
897 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
898 &dma_handle, GFP_KERNEL | __GFP_ZERO);
901 "Failed to allocate %zu of dma memory for TPC kernel\n",
907 memcpy(cpu_addr, fw->data, fw_size);
909 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
911 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
915 release_firmware(fw);
919 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
921 struct gaudi_device *gaudi = hdev->asic_specific;
922 struct gaudi_collective_properties *prop = &gaudi->collective_props;
923 struct hl_hw_queue *q;
924 u32 i, sob_id, sob_group_id, queue_id;
926 /* Iterate through SOB groups and assign a SOB for each slave queue */
928 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
929 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
931 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
932 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
933 q = &hdev->kernel_queues[queue_id + (4 * i)];
934 q->sync_stream_prop.collective_sob_id = sob_id + i;
937 /* Both DMA5 and TPC7 use the same resources since only a single
938 * engine need to participate in the reduction process
940 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
941 q = &hdev->kernel_queues[queue_id];
942 q->sync_stream_prop.collective_sob_id =
943 sob_id + NIC_NUMBER_OF_ENGINES;
945 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
946 q = &hdev->kernel_queues[queue_id];
947 q->sync_stream_prop.collective_sob_id =
948 sob_id + NIC_NUMBER_OF_ENGINES;
951 static void gaudi_sob_group_hw_reset(struct kref *ref)
953 struct gaudi_hw_sob_group *hw_sob_group =
954 container_of(ref, struct gaudi_hw_sob_group, kref);
955 struct hl_device *hdev = hw_sob_group->hdev;
958 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
959 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
960 (hw_sob_group->base_sob_id + i) * 4, 0);
962 kref_init(&hw_sob_group->kref);
965 static void gaudi_sob_group_reset_error(struct kref *ref)
967 struct gaudi_hw_sob_group *hw_sob_group =
968 container_of(ref, struct gaudi_hw_sob_group, kref);
969 struct hl_device *hdev = hw_sob_group->hdev;
972 "SOB release shouldn't be called here, base_sob_id: %d\n",
973 hw_sob_group->base_sob_id);
976 static int gaudi_collective_init(struct hl_device *hdev)
978 u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
979 struct gaudi_collective_properties *prop;
980 struct gaudi_device *gaudi;
982 gaudi = hdev->asic_specific;
983 prop = &gaudi->collective_props;
984 sob_id = hdev->asic_prop.collective_first_sob;
986 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
987 reserved_sobs_per_group =
988 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
990 /* Init SOB groups */
991 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
992 prop->hw_sob_group[i].hdev = hdev;
993 prop->hw_sob_group[i].base_sob_id = sob_id;
994 sob_id += reserved_sobs_per_group;
995 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
998 for (i = 0 ; i < QMAN_STREAMS; i++) {
999 prop->next_sob_group_val[i] = 1;
1000 prop->curr_sob_group_idx[i] = 0;
1001 gaudi_collective_map_sobs(hdev, i);
1004 prop->mstr_sob_mask[0] = 0;
1005 master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
1006 for (i = 0 ; i < master_monitor_sobs ; i++)
1007 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1008 prop->mstr_sob_mask[0] |= BIT(i);
1010 prop->mstr_sob_mask[1] = 0;
1011 master_monitor_sobs =
1012 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
1013 for (i = 0 ; i < master_monitor_sobs; i++) {
1014 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1015 prop->mstr_sob_mask[1] |= BIT(i);
1018 /* Set collective engine bit */
1019 prop->mstr_sob_mask[1] |= BIT(i);
1024 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1026 struct gaudi_device *gaudi = hdev->asic_specific;
1027 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1029 kref_put(&cprop->hw_sob_group[sob_group].kref,
1030 gaudi_sob_group_hw_reset);
1033 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1034 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1036 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1037 struct gaudi_collective_properties *cprop;
1038 struct hl_gen_wait_properties wait_prop;
1039 struct hl_sync_stream_properties *prop;
1040 struct gaudi_device *gaudi;
1042 gaudi = hdev->asic_specific;
1043 cprop = &gaudi->collective_props;
1044 queue_id = job->hw_queue_id;
1045 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1048 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1049 master_monitor = prop->collective_mstr_mon_id[0];
1052 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1053 master_sob_base, cprop->mstr_sob_mask[0],
1054 cprop->next_sob_group_val[stream],
1055 master_monitor, queue_id);
1057 wait_prop.data = (void *) job->patched_cb;
1058 wait_prop.sob_base = master_sob_base;
1059 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1060 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1061 wait_prop.mon_id = master_monitor;
1062 wait_prop.q_idx = queue_id;
1063 wait_prop.size = cb_size;
1064 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1066 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1067 master_monitor = prop->collective_mstr_mon_id[1];
1070 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1071 master_sob_base, cprop->mstr_sob_mask[1],
1072 cprop->next_sob_group_val[stream],
1073 master_monitor, queue_id);
1075 wait_prop.sob_base = master_sob_base;
1076 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1077 wait_prop.mon_id = master_monitor;
1078 wait_prop.size = cb_size;
1079 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1082 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1083 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1085 struct hl_gen_wait_properties wait_prop;
1086 struct hl_sync_stream_properties *prop;
1087 u32 queue_id, cb_size = 0;
1089 queue_id = job->hw_queue_id;
1090 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1092 /* Add to wait CBs using slave monitor */
1093 wait_prop.data = (void *) job->user_cb;
1094 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1095 wait_prop.sob_mask = 0x1;
1096 wait_prop.sob_val = cs_cmpl->sob_val;
1097 wait_prop.mon_id = prop->collective_slave_mon_id;
1098 wait_prop.q_idx = queue_id;
1099 wait_prop.size = cb_size;
1102 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1103 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1104 prop->collective_slave_mon_id, queue_id);
1106 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1109 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1110 prop->collective_sob_id, queue_id);
1112 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1113 prop->collective_sob_id, cb_size);
1116 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1118 struct hl_cs_compl *signal_cs_cmpl =
1119 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1120 struct hl_cs_compl *cs_cmpl =
1121 container_of(cs->fence, struct hl_cs_compl, base_fence);
1122 struct gaudi_collective_properties *cprop;
1123 u32 stream, queue_id, sob_group_offset;
1124 struct gaudi_device *gaudi;
1125 struct hl_device *hdev;
1126 struct hl_cs_job *job;
1131 gaudi = hdev->asic_specific;
1132 cprop = &gaudi->collective_props;
1134 /* copy the SOB id and value of the signal CS */
1135 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1136 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1138 /* Calculate the stream from collective master queue (1st job) */
1139 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1140 stream = job->hw_queue_id % 4;
1142 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1144 list_for_each_entry(job, &cs->job_list, cs_node) {
1145 queue_id = job->hw_queue_id;
1147 if (hdev->kernel_queues[queue_id].collective_mode ==
1148 HL_COLLECTIVE_MASTER)
1149 gaudi_collective_master_init_job(hdev, job, stream,
1152 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1155 cs_cmpl->sob_group = sob_group_offset;
1157 /* Handle sob group kref and wraparound */
1158 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1159 cprop->next_sob_group_val[stream]++;
1161 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1163 * Decrement as we reached the max value.
1164 * The release function won't be called here as we've
1165 * just incremented the refcount.
1167 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1168 gaudi_sob_group_reset_error);
1169 cprop->next_sob_group_val[stream] = 1;
1170 /* only two SOBs are currently in use */
1171 cprop->curr_sob_group_idx[stream] =
1172 (cprop->curr_sob_group_idx[stream] + 1) &
1175 gaudi_collective_map_sobs(hdev, stream);
1177 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1178 cprop->curr_sob_group_idx[stream], stream);
1181 /* Increment kref since all slave queues are now waiting on it */
1182 kref_get(&cs_cmpl->hw_sob->kref);
1184 * Must put the signal fence after the SOB refcnt increment so
1185 * the SOB refcnt won't turn 0 and reset the SOB before the
1186 * wait CS was submitted.
1189 hl_fence_put(cs->signal_fence);
1190 cs->signal_fence = NULL;
1193 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1194 struct hl_ctx *ctx, struct hl_cs *cs,
1195 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1197 struct hw_queue_properties *hw_queue_prop;
1198 struct hl_cs_counters_atomic *cntr;
1199 struct hl_cs_job *job;
1204 cntr = &hdev->aggregated_cs_counters;
1206 if (mode == HL_COLLECTIVE_MASTER) {
1207 /* CB size of collective master queue contains
1208 * 4 msg short packets for monitor 1 configuration
1210 * 4 msg short packets for monitor 2 configuration
1212 * 2 msg prot packets for completion and MSI-X
1214 cb_size = sizeof(struct packet_msg_short) * 8 +
1215 sizeof(struct packet_fence) * 2 +
1216 sizeof(struct packet_msg_prot) * 2;
1219 /* CB size of collective slave queues contains
1220 * 4 msg short packets for monitor configuration
1222 * 1 additional msg short packet for sob signal
1224 cb_size = sizeof(struct packet_msg_short) * 5 +
1225 sizeof(struct packet_fence);
1229 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1230 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1232 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1233 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1234 dev_err(hdev->dev, "Failed to allocate a new job\n");
1238 /* Allocate internal mapped CB for non patched CBs */
1239 cb = hl_cb_kernel_create(hdev, cb_size,
1240 hdev->mmu_enable && !patched_cb);
1242 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1243 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1251 atomic_inc(&job->user_cb->cs_cnt);
1252 job->user_cb_size = cb_size;
1253 job->hw_queue_id = queue_id;
1256 * No need in parsing, user CB is the patched CB.
1257 * We call hl_cb_destroy() out of two reasons - we don't need
1258 * the CB in the CB idr anymore and to decrement its refcount as
1259 * it was incremented inside hl_cb_kernel_create().
1262 job->patched_cb = job->user_cb;
1264 job->patched_cb = NULL;
1266 job->job_cb_size = job->user_cb_size;
1267 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1269 /* increment refcount as for external queues we get completion */
1270 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1273 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1275 list_add_tail(&job->cs_node, &cs->job_list);
1277 hl_debugfs_add_job(hdev, job);
1282 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1283 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1284 u32 collective_engine_id)
1286 struct gaudi_device *gaudi = hdev->asic_specific;
1287 struct hw_queue_properties *hw_queue_prop;
1288 u32 queue_id, collective_queue, num_jobs;
1289 u32 stream, nic_queue, nic_idx = 0;
1293 /* Verify wait queue id is configured as master */
1294 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1295 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1297 "Queue %d is not configured as collective master\n",
1302 /* Verify engine id is supported */
1303 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1304 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1306 "Collective wait does not support engine %u\n",
1307 collective_engine_id);
1311 stream = wait_queue_id % 4;
1313 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1314 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1316 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1318 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1319 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1321 /* First job goes to the collective master queue, it will wait for
1322 * the collective slave queues to finish execution.
1323 * The synchronization is done using two monitors:
1324 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1325 * reduction engine (DMA5/TPC7).
1327 * Rest of the jobs goes to the collective slave queues which will
1328 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1330 for (i = 0 ; i < num_jobs ; i++) {
1332 queue_id = wait_queue_id;
1333 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1334 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1336 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1337 if (gaudi->hw_cap_initialized &
1338 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1343 queue_id = nic_queue;
1350 queue_id = collective_queue;
1353 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1354 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1364 static int gaudi_late_init(struct hl_device *hdev)
1366 struct gaudi_device *gaudi = hdev->asic_specific;
1369 rc = gaudi->cpucp_info_get(hdev);
1371 dev_err(hdev->dev, "Failed to get cpucp info\n");
1375 if ((hdev->card_type == cpucp_card_type_pci) &&
1376 (hdev->nic_ports_mask & 0x3)) {
1378 "PCI card detected, only 8 ports are enabled\n");
1379 hdev->nic_ports_mask &= ~0x3;
1381 /* Stop and disable unused NIC QMANs */
1382 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1383 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1384 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1386 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1387 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1388 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1390 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1391 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1393 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1396 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1398 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1402 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
1404 rc = gaudi_fetch_psoc_frequency(hdev);
1406 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1407 goto disable_pci_access;
1410 rc = gaudi_mmu_clear_pgt_range(hdev);
1412 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1413 goto disable_pci_access;
1416 rc = gaudi_init_tpc_mem(hdev);
1418 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1419 goto disable_pci_access;
1422 rc = gaudi_collective_init(hdev);
1424 dev_err(hdev->dev, "Failed to init collective\n");
1425 goto disable_pci_access;
1431 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1436 static void gaudi_late_fini(struct hl_device *hdev)
1438 const struct hwmon_channel_info **channel_info_arr;
1441 if (!hdev->hl_chip_info->info)
1444 channel_info_arr = hdev->hl_chip_info->info;
1446 while (channel_info_arr[i]) {
1447 kfree(channel_info_arr[i]->config);
1448 kfree(channel_info_arr[i]);
1452 kfree(channel_info_arr);
1454 hdev->hl_chip_info->info = NULL;
1457 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1459 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1460 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1464 * The device CPU works with 40-bits addresses, while bit 39 must be set
1465 * to '1' when accessing the host.
1466 * Bits 49:39 of the full host address are saved for a later
1467 * configuration of the HW to perform extension to 50 bits.
1468 * Because there is a single HW register that holds the extension bits,
1469 * these bits must be identical in all allocated range.
1472 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1474 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1475 HL_CPU_ACCESSIBLE_MEM_SIZE,
1477 GFP_KERNEL | __GFP_ZERO);
1478 if (!virt_addr_arr[i]) {
1480 goto free_dma_mem_arr;
1483 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1484 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1485 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1489 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1491 "MSB of CPU accessible DMA memory are not identical in all range\n");
1493 goto free_dma_mem_arr;
1496 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1497 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1498 hdev->cpu_pci_msb_addr =
1499 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1501 if (hdev->asic_prop.fw_security_disabled)
1502 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1505 for (j = 0 ; j < i ; j++)
1506 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1507 HL_CPU_ACCESSIBLE_MEM_SIZE,
1514 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1516 struct gaudi_device *gaudi = hdev->asic_specific;
1517 struct gaudi_internal_qman_info *q;
1520 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1521 q = &gaudi->internal_qmans[i];
1522 if (!q->pq_kernel_addr)
1524 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1530 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1532 struct gaudi_device *gaudi = hdev->asic_specific;
1533 struct gaudi_internal_qman_info *q;
1536 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1537 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1540 q = &gaudi->internal_qmans[i];
1543 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1544 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1546 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1547 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1549 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1550 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1552 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1553 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1556 dev_err(hdev->dev, "Bad internal queue index %d", i);
1558 goto free_internal_qmans_pq_mem;
1561 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1564 GFP_KERNEL | __GFP_ZERO);
1565 if (!q->pq_kernel_addr) {
1567 goto free_internal_qmans_pq_mem;
1573 free_internal_qmans_pq_mem:
1574 gaudi_free_internal_qmans_pq_mem(hdev);
1578 static int gaudi_sw_init(struct hl_device *hdev)
1580 struct gaudi_device *gaudi;
1581 u32 i, event_id = 0;
1584 /* Allocate device structure */
1585 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1589 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1590 if (gaudi_irq_map_table[i].valid) {
1591 if (event_id == GAUDI_EVENT_SIZE) {
1593 "Event array exceeds the limit of %u events\n",
1596 goto free_gaudi_device;
1599 gaudi->events[event_id++] =
1600 gaudi_irq_map_table[i].fc_id;
1604 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1606 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1608 hdev->asic_specific = gaudi;
1610 /* Create DMA pool for small allocations */
1611 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1612 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1613 if (!hdev->dma_pool) {
1614 dev_err(hdev->dev, "failed to create DMA pool\n");
1616 goto free_gaudi_device;
1619 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1623 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1624 if (!hdev->cpu_accessible_dma_pool) {
1626 "Failed to create CPU accessible DMA pool\n");
1628 goto free_cpu_dma_mem;
1631 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1632 (uintptr_t) hdev->cpu_accessible_dma_mem,
1633 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1636 "Failed to add memory to CPU accessible DMA pool\n");
1638 goto free_cpu_accessible_dma_pool;
1641 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1643 goto free_cpu_accessible_dma_pool;
1645 spin_lock_init(&gaudi->hw_queues_lock);
1646 mutex_init(&gaudi->clk_gate_mutex);
1648 hdev->supports_sync_stream = true;
1649 hdev->supports_coresight = true;
1653 free_cpu_accessible_dma_pool:
1654 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1656 if (hdev->asic_prop.fw_security_disabled)
1657 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1658 hdev->cpu_pci_msb_addr);
1659 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1660 HL_CPU_ACCESSIBLE_MEM_SIZE,
1661 hdev->cpu_accessible_dma_mem,
1662 hdev->cpu_accessible_dma_address);
1664 dma_pool_destroy(hdev->dma_pool);
1670 static int gaudi_sw_fini(struct hl_device *hdev)
1672 struct gaudi_device *gaudi = hdev->asic_specific;
1674 gaudi_free_internal_qmans_pq_mem(hdev);
1676 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1678 if (hdev->asic_prop.fw_security_disabled)
1679 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1680 hdev->cpu_pci_msb_addr);
1682 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1683 HL_CPU_ACCESSIBLE_MEM_SIZE,
1684 hdev->cpu_accessible_dma_mem,
1685 hdev->cpu_accessible_dma_address);
1687 dma_pool_destroy(hdev->dma_pool);
1689 mutex_destroy(&gaudi->clk_gate_mutex);
1696 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1698 struct hl_device *hdev = arg;
1704 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1705 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1707 hl_irq_handler_eq(irq, &hdev->event_queue);
1713 * For backward compatibility, new MSI interrupts should be set after the
1714 * existing CPU and NIC interrupts.
1716 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1721 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1722 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1723 GAUDI_EVENT_QUEUE_MSI_IDX);
1725 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1726 (nr + NIC_NUMBER_OF_ENGINES + 1);
1728 return pci_irq_vector(hdev->pdev, msi_vec);
1731 static int gaudi_enable_msi_single(struct hl_device *hdev)
1735 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1737 irq = gaudi_pci_irq_vector(hdev, 0, false);
1738 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1739 "gaudi single msi", hdev);
1742 "Failed to request single MSI IRQ\n");
1747 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1749 int cq_cnt = hdev->asic_prop.completion_queues_count;
1750 int rc, i, irq_cnt_init, irq;
1752 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1753 irq = gaudi_pci_irq_vector(hdev, i, false);
1754 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1755 &hdev->completion_queue[i]);
1757 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1762 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1763 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1764 &hdev->event_queue);
1766 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1773 for (i = 0 ; i < irq_cnt_init ; i++)
1774 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1775 &hdev->completion_queue[i]);
1779 static int gaudi_enable_msi(struct hl_device *hdev)
1781 struct gaudi_device *gaudi = hdev->asic_specific;
1784 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1787 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1790 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1794 if (rc < NUMBER_OF_INTERRUPTS) {
1795 gaudi->multi_msi_mode = false;
1796 rc = gaudi_enable_msi_single(hdev);
1798 gaudi->multi_msi_mode = true;
1799 rc = gaudi_enable_msi_multi(hdev);
1803 goto free_pci_irq_vectors;
1805 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1809 free_pci_irq_vectors:
1810 pci_free_irq_vectors(hdev->pdev);
1814 static void gaudi_sync_irqs(struct hl_device *hdev)
1816 struct gaudi_device *gaudi = hdev->asic_specific;
1817 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1819 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1822 /* Wait for all pending IRQs to be finished */
1823 if (gaudi->multi_msi_mode) {
1824 for (i = 0 ; i < cq_cnt ; i++)
1825 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1827 synchronize_irq(gaudi_pci_irq_vector(hdev,
1828 GAUDI_EVENT_QUEUE_MSI_IDX,
1831 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1835 static void gaudi_disable_msi(struct hl_device *hdev)
1837 struct gaudi_device *gaudi = hdev->asic_specific;
1838 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1840 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1843 gaudi_sync_irqs(hdev);
1845 if (gaudi->multi_msi_mode) {
1846 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1848 free_irq(irq, &hdev->event_queue);
1850 for (i = 0 ; i < cq_cnt ; i++) {
1851 irq = gaudi_pci_irq_vector(hdev, i, false);
1852 free_irq(irq, &hdev->completion_queue[i]);
1855 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1858 pci_free_irq_vectors(hdev->pdev);
1860 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1863 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1865 struct gaudi_device *gaudi = hdev->asic_specific;
1867 if (!hdev->asic_prop.fw_security_disabled)
1870 if (hdev->asic_prop.fw_security_status_valid &&
1871 (hdev->asic_prop.fw_app_security_map &
1872 CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1875 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1878 if (!hdev->sram_scrambler_enable)
1881 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1882 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1883 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1884 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1885 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1886 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1887 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1888 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1889 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1890 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1891 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1892 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1893 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1894 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1895 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1896 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1898 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1899 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1900 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1901 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1902 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1903 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1904 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1905 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1906 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1907 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1908 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1909 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1910 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1911 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1912 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1913 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1915 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1916 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1917 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1918 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1919 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1920 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1921 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1922 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1923 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1924 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1925 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1926 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1927 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1928 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1929 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1930 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1932 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1935 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1937 struct gaudi_device *gaudi = hdev->asic_specific;
1939 if (!hdev->asic_prop.fw_security_disabled)
1942 if (hdev->asic_prop.fw_security_status_valid &&
1943 (hdev->asic_prop.fw_boot_cpu_security_map &
1944 CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1947 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1950 if (!hdev->dram_scrambler_enable)
1953 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1954 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1955 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1956 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1957 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1958 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1959 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1960 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1961 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1962 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1963 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1964 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1965 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1966 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1967 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1968 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1970 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1971 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1972 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1973 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1974 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1975 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1976 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1977 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1978 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1979 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1980 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1981 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1982 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1983 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1984 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1985 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1987 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1988 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1989 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1990 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1991 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1992 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1993 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1994 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1995 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1996 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1997 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1998 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1999 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2000 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2001 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2002 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2004 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2007 static void gaudi_init_e2e(struct hl_device *hdev)
2009 if (!hdev->asic_prop.fw_security_disabled)
2012 if (hdev->asic_prop.fw_security_status_valid &&
2013 (hdev->asic_prop.fw_boot_cpu_security_map &
2014 CPU_BOOT_DEV_STS0_E2E_CRED_EN))
2017 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2018 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2019 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2020 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2022 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2023 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2024 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2025 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2027 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2028 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2029 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2030 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2032 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2033 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2034 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2035 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2037 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2038 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2039 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2040 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2042 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2043 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2044 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2045 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2047 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2048 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2049 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2050 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2052 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2053 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2054 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2055 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2057 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2058 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2059 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2060 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2062 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2063 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2064 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2065 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2067 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2068 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2069 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2070 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2072 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2073 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2074 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2075 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2077 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2078 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2079 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2080 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2082 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2083 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2084 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2085 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2087 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2088 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2089 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2090 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2092 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2093 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2094 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2095 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2097 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2098 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2099 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2100 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2102 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2103 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2104 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2105 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2107 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2108 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2109 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2110 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2112 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2113 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2114 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2115 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2117 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2118 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2119 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2120 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2122 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2123 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2124 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2125 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2127 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2128 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2129 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2130 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2132 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2133 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2134 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2135 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2137 if (!hdev->dram_scrambler_enable) {
2138 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2139 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2140 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2141 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2143 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2144 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2145 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2146 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2148 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2149 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2150 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2151 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2153 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2154 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2155 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2156 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2158 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2159 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2160 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2161 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2163 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2164 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2165 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2166 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2168 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2169 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2170 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2171 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2173 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2174 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2175 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2176 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2178 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2179 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2180 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2181 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2183 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2184 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2185 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2186 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2188 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2189 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2190 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2191 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2193 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2194 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2195 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2196 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2198 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2199 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2200 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2201 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2203 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2204 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2205 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2206 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2208 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2209 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2210 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2211 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2213 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2214 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2215 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2216 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2218 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2219 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2220 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2221 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2223 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2224 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2225 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2226 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2228 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2229 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2230 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2231 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2233 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2234 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2235 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2236 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2238 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2239 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2240 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2241 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2243 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2244 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2245 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2246 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2248 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2249 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2250 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2251 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2253 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2254 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2255 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2256 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2259 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2260 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2261 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2262 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2264 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2265 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2266 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2267 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2269 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2270 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2271 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2272 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2274 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2275 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2276 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2277 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2279 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2280 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2281 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2282 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2284 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2285 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2286 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2287 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2289 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2290 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2291 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2292 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2294 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2295 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2296 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2297 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2299 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2300 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2301 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2302 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2304 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2305 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2306 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2307 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2309 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2310 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2311 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2312 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2314 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2315 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2316 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2317 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2319 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2320 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2321 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2322 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2324 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2325 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2326 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2327 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2329 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2330 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2331 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2332 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2334 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2335 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2336 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2337 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2340 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2341 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2342 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2344 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2345 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2346 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2347 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2349 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2350 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2351 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2352 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2354 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2355 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2356 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2357 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2359 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2360 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2361 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2362 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2364 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2365 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2366 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2367 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2369 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2370 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2371 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2372 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2374 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2375 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2376 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2377 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2380 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2382 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2384 if (!hdev->asic_prop.fw_security_disabled)
2387 if (hdev->asic_prop.fw_security_status_valid &&
2388 (hdev->asic_prop.fw_boot_cpu_security_map &
2389 CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2392 hbm0_wr = 0x33333333;
2393 hbm0_rd = 0x77777777;
2394 hbm1_wr = 0x55555555;
2395 hbm1_rd = 0xDDDDDDDD;
2397 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2398 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2399 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2400 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2402 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2403 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2404 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2405 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2407 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2408 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2409 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2410 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2412 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2413 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2414 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2415 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2417 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2418 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2419 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2420 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2421 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2422 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2423 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2424 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2425 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2426 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2427 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2428 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2430 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2431 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2432 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2433 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2434 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2435 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2436 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2437 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2438 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2439 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2440 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2441 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2444 static void gaudi_init_golden_registers(struct hl_device *hdev)
2449 gaudi_init_e2e(hdev);
2450 gaudi_init_hbm_cred(hdev);
2452 hdev->asic_funcs->disable_clock_gating(hdev);
2454 for (tpc_id = 0, tpc_offset = 0;
2455 tpc_id < TPC_NUMBER_OF_ENGINES;
2456 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2457 /* Mask all arithmetic interrupts from TPC */
2458 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2459 /* Set 16 cache lines */
2460 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2461 ICACHE_FETCH_LINE_NUM, 2);
2464 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2465 for (i = 0 ; i < 128 ; i += 8)
2466 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2468 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2469 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2470 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2471 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2474 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2475 int qman_id, dma_addr_t qman_pq_addr)
2477 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2478 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2479 u32 q_off, dma_qm_offset;
2482 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2484 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2485 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2486 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2487 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2488 so_base_en_lo = lower_32_bits(CFG_BASE +
2489 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2490 so_base_en_hi = upper_32_bits(CFG_BASE +
2491 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2492 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2493 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2494 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2495 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2496 so_base_ws_lo = lower_32_bits(CFG_BASE +
2497 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2498 so_base_ws_hi = upper_32_bits(CFG_BASE +
2499 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2501 q_off = dma_qm_offset + qman_id * 4;
2503 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2504 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2506 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2507 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2508 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2510 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2511 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2512 QMAN_LDMA_SRC_OFFSET);
2513 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2514 QMAN_LDMA_DST_OFFSET);
2516 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2517 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2518 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2519 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2520 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2521 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2522 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2523 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2525 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2527 /* The following configuration is needed only once per QMAN */
2529 /* Configure RAZWI IRQ */
2530 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2531 if (hdev->stop_on_err) {
2533 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2536 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2537 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2538 lower_32_bits(CFG_BASE +
2539 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2540 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2541 upper_32_bits(CFG_BASE +
2542 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2543 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2544 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2547 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2548 QM_ARB_ERR_MSG_EN_MASK);
2550 /* Increase ARB WDT to support streams architecture */
2551 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2552 GAUDI_ARB_WDT_TIMEOUT);
2554 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2555 QMAN_EXTERNAL_MAKE_TRUSTED);
2557 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2561 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2563 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2564 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2566 /* Set to maximum possible according to physical size */
2567 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2568 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2570 /* WA for H/W bug H3-2116 */
2571 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2573 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2574 if (hdev->stop_on_err)
2575 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2577 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2578 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2579 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2580 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2581 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2582 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2583 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2584 WREG32(mmDMA0_CORE_PROT + dma_offset,
2585 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2586 /* If the channel is secured, it should be in MMU bypass mode */
2587 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2588 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2589 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2592 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2595 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2597 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2600 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2602 struct gaudi_device *gaudi = hdev->asic_specific;
2603 struct hl_hw_queue *q;
2604 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2606 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2609 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2610 dma_id = gaudi_dma_assignment[i];
2612 * For queues after the CPU Q need to add 1 to get the correct
2613 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2614 * order to get the correct MSI register.
2618 nic_skip = NIC_NUMBER_OF_ENGINES;
2624 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2625 q_idx = 4 * dma_id + j + cpu_skip;
2626 q = &hdev->kernel_queues[q_idx];
2628 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2629 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2633 gaudi_init_dma_core(hdev, dma_id);
2635 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2638 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2641 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2642 int qman_id, u64 qman_base_addr)
2644 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2645 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2646 u32 q_off, dma_qm_offset;
2649 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2651 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2652 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2653 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2654 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2655 so_base_en_lo = lower_32_bits(CFG_BASE +
2656 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2657 so_base_en_hi = upper_32_bits(CFG_BASE +
2658 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2659 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2660 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2661 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2662 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2663 so_base_ws_lo = lower_32_bits(CFG_BASE +
2664 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2665 so_base_ws_hi = upper_32_bits(CFG_BASE +
2666 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2668 q_off = dma_qm_offset + qman_id * 4;
2671 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2672 lower_32_bits(qman_base_addr));
2673 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2674 upper_32_bits(qman_base_addr));
2676 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2677 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2678 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2680 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2681 QMAN_CPDMA_SIZE_OFFSET);
2682 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2683 QMAN_CPDMA_SRC_OFFSET);
2684 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2685 QMAN_CPDMA_DST_OFFSET);
2687 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2688 QMAN_LDMA_SIZE_OFFSET);
2689 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2690 QMAN_LDMA_SRC_OFFSET);
2691 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2692 QMAN_LDMA_DST_OFFSET);
2694 /* Configure RAZWI IRQ */
2695 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2696 if (hdev->stop_on_err) {
2698 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2700 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2702 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2703 lower_32_bits(CFG_BASE +
2704 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2705 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2706 upper_32_bits(CFG_BASE +
2707 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2708 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2709 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2712 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2713 QM_ARB_ERR_MSG_EN_MASK);
2715 /* Increase ARB WDT to support streams architecture */
2716 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2717 GAUDI_ARB_WDT_TIMEOUT);
2719 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2720 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2721 QMAN_INTERNAL_MAKE_TRUSTED);
2724 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2725 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2726 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2727 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2729 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2730 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2731 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2733 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2735 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2737 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2742 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2744 struct gaudi_device *gaudi = hdev->asic_specific;
2745 struct gaudi_internal_qman_info *q;
2747 int i, j, dma_id, internal_q_index;
2749 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2752 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2753 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2755 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2757 * Add the CPU queue in order to get the correct queue
2758 * number as all internal queue are placed after it
2760 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2762 q = &gaudi->internal_qmans[internal_q_index];
2763 qman_base_addr = (u64) q->pq_dma_addr;
2764 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2768 /* Initializing lower CP for HBM DMA QMAN */
2769 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2771 gaudi_init_dma_core(hdev, dma_id);
2773 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2776 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2779 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2780 int qman_id, u64 qman_base_addr)
2782 u32 mtr_base_lo, mtr_base_hi;
2783 u32 so_base_lo, so_base_hi;
2787 mtr_base_lo = lower_32_bits(CFG_BASE +
2788 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2789 mtr_base_hi = upper_32_bits(CFG_BASE +
2790 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2791 so_base_lo = lower_32_bits(CFG_BASE +
2792 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2793 so_base_hi = upper_32_bits(CFG_BASE +
2794 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2796 q_off = mme_offset + qman_id * 4;
2799 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2800 lower_32_bits(qman_base_addr));
2801 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2802 upper_32_bits(qman_base_addr));
2804 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2805 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2806 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2808 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2809 QMAN_CPDMA_SIZE_OFFSET);
2810 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2811 QMAN_CPDMA_SRC_OFFSET);
2812 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2813 QMAN_CPDMA_DST_OFFSET);
2815 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2816 QMAN_LDMA_SIZE_OFFSET);
2817 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2818 QMAN_LDMA_SRC_OFFSET);
2819 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2820 QMAN_LDMA_DST_OFFSET);
2822 /* Configure RAZWI IRQ */
2823 mme_id = mme_offset /
2824 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2826 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2827 if (hdev->stop_on_err) {
2829 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2831 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2832 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2833 lower_32_bits(CFG_BASE +
2834 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2835 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2836 upper_32_bits(CFG_BASE +
2837 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2838 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2839 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2842 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2843 QM_ARB_ERR_MSG_EN_MASK);
2845 /* Increase ARB WDT to support streams architecture */
2846 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2847 GAUDI_ARB_WDT_TIMEOUT);
2849 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2850 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2851 QMAN_INTERNAL_MAKE_TRUSTED);
2854 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2855 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2856 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2857 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2860 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2862 struct gaudi_device *gaudi = hdev->asic_specific;
2863 struct gaudi_internal_qman_info *q;
2866 int i, internal_q_index;
2868 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2872 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2873 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2876 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2878 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2879 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2880 q = &gaudi->internal_qmans[internal_q_index];
2881 qman_base_addr = (u64) q->pq_dma_addr;
2882 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2888 /* Initializing lower CP for MME QMANs */
2889 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2890 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2891 gaudi_init_mme_qman(hdev, 0, 4, 0);
2893 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2894 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2896 gaudi->hw_cap_initialized |= HW_CAP_MME;
2899 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2900 int qman_id, u64 qman_base_addr)
2902 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2903 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2907 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2908 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2909 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2910 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911 so_base_en_lo = lower_32_bits(CFG_BASE +
2912 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2913 so_base_en_hi = upper_32_bits(CFG_BASE +
2914 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2916 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2917 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2918 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2919 so_base_ws_lo = lower_32_bits(CFG_BASE +
2920 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2921 so_base_ws_hi = upper_32_bits(CFG_BASE +
2922 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2924 q_off = tpc_offset + qman_id * 4;
2926 tpc_id = tpc_offset /
2927 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2930 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2931 lower_32_bits(qman_base_addr));
2932 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2933 upper_32_bits(qman_base_addr));
2935 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2936 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2937 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2939 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2940 QMAN_CPDMA_SIZE_OFFSET);
2941 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2942 QMAN_CPDMA_SRC_OFFSET);
2943 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2944 QMAN_CPDMA_DST_OFFSET);
2946 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2947 QMAN_LDMA_SIZE_OFFSET);
2948 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2949 QMAN_LDMA_SRC_OFFSET);
2950 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2951 QMAN_LDMA_DST_OFFSET);
2953 /* Configure RAZWI IRQ */
2954 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2955 if (hdev->stop_on_err) {
2957 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2960 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2961 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2962 lower_32_bits(CFG_BASE +
2963 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2964 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2965 upper_32_bits(CFG_BASE +
2966 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2967 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2968 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2971 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2972 QM_ARB_ERR_MSG_EN_MASK);
2974 /* Increase ARB WDT to support streams architecture */
2975 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2976 GAUDI_ARB_WDT_TIMEOUT);
2978 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2979 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2980 QMAN_INTERNAL_MAKE_TRUSTED);
2983 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2984 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2985 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2986 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2988 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2990 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2992 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2994 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2996 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3001 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3003 struct gaudi_device *gaudi = hdev->asic_specific;
3004 struct gaudi_internal_qman_info *q;
3006 u32 so_base_hi, tpc_offset = 0;
3007 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3008 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3009 int i, tpc_id, internal_q_index;
3011 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3014 so_base_hi = upper_32_bits(CFG_BASE +
3015 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3018 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3019 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3020 tpc_id * QMAN_STREAMS + i;
3021 q = &gaudi->internal_qmans[internal_q_index];
3022 qman_base_addr = (u64) q->pq_dma_addr;
3023 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3027 /* Initializing lower CP for TPC QMAN */
3028 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3030 /* Enable the QMAN and TPC channel */
3031 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3036 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3039 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3041 gaudi->hw_cap_initialized |=
3042 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3046 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3047 int qman_id, u64 qman_base_addr, int nic_id)
3049 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3050 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3054 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3055 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3056 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3057 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3058 so_base_en_lo = lower_32_bits(CFG_BASE +
3059 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3060 so_base_en_hi = upper_32_bits(CFG_BASE +
3061 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3062 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3063 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3064 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3065 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3066 so_base_ws_lo = lower_32_bits(CFG_BASE +
3067 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3068 so_base_ws_hi = upper_32_bits(CFG_BASE +
3069 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3071 q_off = nic_offset + qman_id * 4;
3073 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3074 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3076 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3077 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3078 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3080 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3081 QMAN_LDMA_SIZE_OFFSET);
3082 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3083 QMAN_LDMA_SRC_OFFSET);
3084 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3085 QMAN_LDMA_DST_OFFSET);
3087 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3088 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3089 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3090 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3092 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3093 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3094 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3095 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3096 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3099 /* Configure RAZWI IRQ */
3100 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3101 if (hdev->stop_on_err) {
3103 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3106 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3107 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3108 lower_32_bits(CFG_BASE +
3109 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3110 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3111 upper_32_bits(CFG_BASE +
3112 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3113 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3114 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3117 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3118 QM_ARB_ERR_MSG_EN_MASK);
3120 /* Increase ARB WDT to support streams architecture */
3121 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3122 GAUDI_ARB_WDT_TIMEOUT);
3124 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3125 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3126 QMAN_INTERNAL_MAKE_TRUSTED);
3130 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3132 struct gaudi_device *gaudi = hdev->asic_specific;
3133 struct gaudi_internal_qman_info *q;
3136 u32 nic_delta_between_qmans =
3137 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3138 u32 nic_delta_between_nics =
3139 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3140 int i, nic_id, internal_q_index;
3142 if (!hdev->nic_ports_mask)
3145 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3148 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3150 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3151 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3152 nic_offset += nic_delta_between_qmans;
3154 nic_offset -= (nic_delta_between_qmans * 2);
3155 nic_offset += nic_delta_between_nics;
3160 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3161 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3162 nic_id * QMAN_STREAMS + i;
3163 q = &gaudi->internal_qmans[internal_q_index];
3164 qman_base_addr = (u64) q->pq_dma_addr;
3165 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3166 qman_base_addr, nic_id);
3169 /* Enable the QMAN */
3170 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3172 nic_offset += nic_delta_between_qmans;
3174 nic_offset -= (nic_delta_between_qmans * 2);
3175 nic_offset += nic_delta_between_nics;
3178 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3182 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3184 struct gaudi_device *gaudi = hdev->asic_specific;
3186 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3189 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3190 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3191 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3194 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3196 struct gaudi_device *gaudi = hdev->asic_specific;
3198 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3201 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3202 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3203 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3204 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3205 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3208 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3210 struct gaudi_device *gaudi = hdev->asic_specific;
3212 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3215 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3216 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3219 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3221 struct gaudi_device *gaudi = hdev->asic_specific;
3225 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3228 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3229 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3230 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3234 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3236 struct gaudi_device *gaudi = hdev->asic_specific;
3237 u32 nic_mask, nic_offset = 0;
3238 u32 nic_delta_between_qmans =
3239 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240 u32 nic_delta_between_nics =
3241 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3245 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3247 if (gaudi->hw_cap_initialized & nic_mask)
3248 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3250 nic_offset += nic_delta_between_qmans;
3252 nic_offset -= (nic_delta_between_qmans * 2);
3253 nic_offset += nic_delta_between_nics;
3258 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3260 struct gaudi_device *gaudi = hdev->asic_specific;
3262 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3265 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3266 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3267 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3268 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3271 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3273 struct gaudi_device *gaudi = hdev->asic_specific;
3275 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3278 /* Stop CPs of HBM DMA QMANs */
3280 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3281 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3282 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3283 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3284 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3287 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3289 struct gaudi_device *gaudi = hdev->asic_specific;
3291 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3294 /* Stop CPs of MME QMANs */
3295 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3296 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3299 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3301 struct gaudi_device *gaudi = hdev->asic_specific;
3303 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3306 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3307 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3308 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3309 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3310 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3311 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3312 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3313 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3316 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3318 struct gaudi_device *gaudi = hdev->asic_specific;
3320 /* Stop upper CPs of QMANs */
3322 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3323 WREG32(mmNIC0_QM0_GLBL_CFG1,
3324 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3325 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3326 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3328 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3329 WREG32(mmNIC0_QM1_GLBL_CFG1,
3330 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3331 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3332 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3334 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3335 WREG32(mmNIC1_QM0_GLBL_CFG1,
3336 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3337 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3338 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3340 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3341 WREG32(mmNIC1_QM1_GLBL_CFG1,
3342 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3343 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3344 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3346 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3347 WREG32(mmNIC2_QM0_GLBL_CFG1,
3348 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3349 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3350 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3352 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3353 WREG32(mmNIC2_QM1_GLBL_CFG1,
3354 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3355 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3356 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3358 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3359 WREG32(mmNIC3_QM0_GLBL_CFG1,
3360 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3361 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3362 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3364 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3365 WREG32(mmNIC3_QM1_GLBL_CFG1,
3366 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3367 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3368 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3370 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3371 WREG32(mmNIC4_QM0_GLBL_CFG1,
3372 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3373 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3374 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3376 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3377 WREG32(mmNIC4_QM1_GLBL_CFG1,
3378 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3379 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3380 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3383 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3385 struct gaudi_device *gaudi = hdev->asic_specific;
3387 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3390 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3391 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3392 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3395 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3397 struct gaudi_device *gaudi = hdev->asic_specific;
3399 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3402 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3403 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3404 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3405 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3406 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3409 static void gaudi_mme_stall(struct hl_device *hdev)
3411 struct gaudi_device *gaudi = hdev->asic_specific;
3413 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3416 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3417 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3418 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3419 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3420 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3421 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3422 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3423 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3424 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3425 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3426 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3427 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3428 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3429 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3430 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3431 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3432 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3435 static void gaudi_tpc_stall(struct hl_device *hdev)
3437 struct gaudi_device *gaudi = hdev->asic_specific;
3439 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3442 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3443 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3444 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3445 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3446 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3447 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3448 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3449 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3452 static void gaudi_set_clock_gating(struct hl_device *hdev)
3454 struct gaudi_device *gaudi = hdev->asic_specific;
3459 /* In case we are during debug session, don't enable the clock gate
3460 * as it may interfere
3465 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3466 enable = !!(hdev->clock_gating_mask &
3467 (BIT_ULL(gaudi_dma_assignment[i])));
3469 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3470 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3471 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3472 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3473 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3476 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3477 enable = !!(hdev->clock_gating_mask &
3478 (BIT_ULL(gaudi_dma_assignment[i])));
3480 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3481 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3482 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3483 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3484 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3487 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3488 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3489 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3491 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3492 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3493 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3495 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3496 enable = !!(hdev->clock_gating_mask &
3497 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3499 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3500 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3501 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3502 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3504 qman_offset += TPC_QMAN_OFFSET;
3507 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3510 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3512 struct gaudi_device *gaudi = hdev->asic_specific;
3516 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
3519 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3520 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3521 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3523 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3526 WREG32(mmMME0_QM_CGM_CFG, 0);
3527 WREG32(mmMME0_QM_CGM_CFG1, 0);
3528 WREG32(mmMME2_QM_CGM_CFG, 0);
3529 WREG32(mmMME2_QM_CGM_CFG1, 0);
3531 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3532 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3533 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3535 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3538 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3541 static void gaudi_enable_timestamp(struct hl_device *hdev)
3543 /* Disable the timestamp counter */
3544 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3546 /* Zero the lower/upper parts of the 64-bit counter */
3547 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3548 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3550 /* Enable the counter */
3551 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3554 static void gaudi_disable_timestamp(struct hl_device *hdev)
3556 /* Disable the timestamp counter */
3557 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3560 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3562 u32 wait_timeout_ms;
3565 "Halting compute engines and disabling interrupts\n");
3568 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3570 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3572 gaudi_stop_nic_qmans(hdev);
3573 gaudi_stop_mme_qmans(hdev);
3574 gaudi_stop_tpc_qmans(hdev);
3575 gaudi_stop_hbm_dma_qmans(hdev);
3576 gaudi_stop_pci_dma_qmans(hdev);
3578 hdev->asic_funcs->disable_clock_gating(hdev);
3580 msleep(wait_timeout_ms);
3582 gaudi_pci_dma_stall(hdev);
3583 gaudi_hbm_dma_stall(hdev);
3584 gaudi_tpc_stall(hdev);
3585 gaudi_mme_stall(hdev);
3587 msleep(wait_timeout_ms);
3589 gaudi_disable_nic_qmans(hdev);
3590 gaudi_disable_mme_qmans(hdev);
3591 gaudi_disable_tpc_qmans(hdev);
3592 gaudi_disable_hbm_dma_qmans(hdev);
3593 gaudi_disable_pci_dma_qmans(hdev);
3595 gaudi_disable_timestamp(hdev);
3597 gaudi_disable_msi(hdev);
3600 static int gaudi_mmu_init(struct hl_device *hdev)
3602 struct asic_fixed_properties *prop = &hdev->asic_prop;
3603 struct gaudi_device *gaudi = hdev->asic_specific;
3607 if (!hdev->mmu_enable)
3610 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3613 for (i = 0 ; i < prop->max_asid ; i++) {
3614 hop0_addr = prop->mmu_pgt_addr +
3615 (i * prop->mmu_hop_table_size);
3617 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3620 "failed to set hop0 addr for asid %d\n", i);
3625 /* init MMU cache manage page */
3626 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3627 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3629 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3631 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3632 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3634 WREG32(mmSTLB_HOP_CONFIGURATION,
3635 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3638 * The H/W expects the first PI after init to be 1. After wraparound
3641 gaudi->mmu_cache_inv_pi = 1;
3643 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3651 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3655 /* HBM scrambler must be initialized before pushing F/W to HBM */
3656 gaudi_init_scrambler_hbm(hdev);
3658 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3660 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3663 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3667 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3669 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3672 static int gaudi_read_device_fw_version(struct hl_device *hdev,
3673 enum hl_fw_component fwc)
3681 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3682 dest = hdev->asic_prop.uboot_ver;
3685 case FW_COMP_PREBOOT:
3686 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3687 dest = hdev->asic_prop.preboot_ver;
3691 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
3695 ver_off &= ~((u32)SRAM_BASE_ADDR);
3697 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3698 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3701 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3703 strcpy(dest, "unavailable");
3710 static int gaudi_init_cpu(struct hl_device *hdev)
3712 struct gaudi_device *gaudi = hdev->asic_specific;
3715 if (!hdev->cpu_enable)
3718 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3722 * The device CPU works with 40 bits addresses.
3723 * This register sets the extension to 50 bits.
3725 if (hdev->asic_prop.fw_security_disabled)
3726 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3728 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3729 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3730 mmCPU_CMD_STATUS_TO_HOST,
3731 mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
3732 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3733 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3738 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3743 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3745 struct gaudi_device *gaudi = hdev->asic_specific;
3748 struct hl_hw_queue *cpu_pq =
3749 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3752 if (!hdev->cpu_queues_enable)
3755 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3758 eq = &hdev->event_queue;
3760 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3761 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3763 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3764 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3766 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3767 lower_32_bits(hdev->cpu_accessible_dma_address));
3768 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3769 upper_32_bits(hdev->cpu_accessible_dma_address));
3771 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3772 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3773 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3775 /* Used for EQ CI */
3776 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3778 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3780 if (gaudi->multi_msi_mode)
3781 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3783 WREG32(mmCPU_IF_QUEUE_INIT,
3784 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3786 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3788 err = hl_poll_timeout(
3790 mmCPU_IF_QUEUE_INIT,
3792 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3798 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3802 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3806 static void gaudi_pre_hw_init(struct hl_device *hdev)
3808 /* Perform read from the device to make sure device is up */
3809 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3811 if (hdev->asic_prop.fw_security_disabled) {
3812 /* Set the access through PCI bars (Linux driver only) as
3815 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3816 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3817 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3819 /* Perform read to flush the waiting writes to ensure
3820 * configuration was set in the device
3822 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3826 * Let's mark in the H/W that we have reached this point. We check
3827 * this value in the reset_before_init function to understand whether
3828 * we need to reset the chip before doing H/W init. This register is
3829 * cleared by the H/W upon H/W reset
3831 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3834 static int gaudi_hw_init(struct hl_device *hdev)
3838 gaudi_pre_hw_init(hdev);
3840 gaudi_init_pci_dma_qmans(hdev);
3842 gaudi_init_hbm_dma_qmans(hdev);
3844 rc = gaudi_init_cpu(hdev);
3846 dev_err(hdev->dev, "failed to initialize CPU\n");
3850 /* SRAM scrambler must be initialized after CPU is running from HBM */
3851 gaudi_init_scrambler_sram(hdev);
3853 /* This is here just in case we are working without CPU */
3854 gaudi_init_scrambler_hbm(hdev);
3856 gaudi_init_golden_registers(hdev);
3858 rc = gaudi_mmu_init(hdev);
3862 gaudi_init_security(hdev);
3864 gaudi_init_mme_qmans(hdev);
3866 gaudi_init_tpc_qmans(hdev);
3868 gaudi_init_nic_qmans(hdev);
3870 hdev->asic_funcs->set_clock_gating(hdev);
3872 gaudi_enable_timestamp(hdev);
3874 /* MSI must be enabled before CPU queues and NIC are initialized */
3875 rc = gaudi_enable_msi(hdev);
3877 goto disable_queues;
3879 /* must be called after MSI was enabled */
3880 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3882 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3887 /* Perform read from the device to flush all configuration */
3888 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3893 gaudi_disable_msi(hdev);
3895 gaudi_disable_mme_qmans(hdev);
3896 gaudi_disable_pci_dma_qmans(hdev);
3901 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3903 struct gaudi_device *gaudi = hdev->asic_specific;
3904 u32 status, reset_timeout_ms, cpu_timeout_ms;
3907 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3912 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3913 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3915 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3916 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3919 /* Set device to handle FLR by H/W as we will put the device CPU to
3922 if (hdev->asic_prop.fw_security_disabled &&
3923 !hdev->asic_prop.hard_reset_done_by_fw)
3924 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3925 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3927 /* I don't know what is the state of the CPU so make sure it is
3928 * stopped in any means necessary
3930 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3932 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3934 if (hdev->asic_prop.fw_security_disabled &&
3935 !hdev->asic_prop.hard_reset_done_by_fw) {
3937 /* Configure the reset registers. Must be done as early as
3938 * possible in case we fail during H/W initialization
3940 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3941 (CFG_RST_H_DMA_MASK |
3942 CFG_RST_H_MME_MASK |
3944 CFG_RST_H_TPC_7_MASK));
3946 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3948 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3949 (CFG_RST_H_HBM_MASK |
3950 CFG_RST_H_TPC_7_MASK |
3951 CFG_RST_H_NIC_MASK |
3953 CFG_RST_H_DMA_MASK |
3954 CFG_RST_H_MME_MASK |
3955 CFG_RST_H_CPU_MASK |
3956 CFG_RST_H_MMU_MASK));
3958 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3959 (CFG_RST_L_IF_MASK |
3960 CFG_RST_L_PSOC_MASK |
3961 CFG_RST_L_TPC_MASK));
3963 msleep(cpu_timeout_ms);
3965 /* Tell ASIC not to re-initialize PCIe */
3966 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3968 /* Restart BTL/BLR upon hard-reset */
3969 if (hdev->asic_prop.fw_security_disabled)
3970 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3972 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3973 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3977 "Issued HARD reset command, going to wait %dms\n",
3981 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3982 * itself is in reset. Need to wait until the reset is deasserted
3984 msleep(reset_timeout_ms);
3986 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3987 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3989 "Timeout while waiting for device to reset 0x%x\n",
3993 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3994 HW_CAP_HBM | HW_CAP_PCI_DMA |
3995 HW_CAP_MME | HW_CAP_TPC_MASK |
3996 HW_CAP_HBM_DMA | HW_CAP_PLL |
3997 HW_CAP_NIC_MASK | HW_CAP_MMU |
3998 HW_CAP_SRAM_SCRAMBLER |
3999 HW_CAP_HBM_SCRAMBLER |
4002 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4006 static int gaudi_suspend(struct hl_device *hdev)
4010 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4012 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4017 static int gaudi_resume(struct hl_device *hdev)
4019 return gaudi_init_iatu(hdev);
4022 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4023 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4027 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4028 VM_DONTCOPY | VM_NORESERVE;
4030 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
4032 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4037 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4039 struct gaudi_device *gaudi = hdev->asic_specific;
4040 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
4042 bool invalid_queue = false;
4044 switch (hw_queue_id) {
4045 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4046 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4047 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4048 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4049 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4052 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4053 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4054 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4055 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4056 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4059 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4060 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4061 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4062 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4063 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4066 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4067 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4068 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4069 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4070 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4073 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4074 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4075 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4076 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4077 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4080 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4081 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4082 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4083 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4084 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4087 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4088 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4089 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4090 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4091 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4094 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4095 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4096 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4097 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4098 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4101 case GAUDI_QUEUE_ID_CPU_PQ:
4102 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4103 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4105 invalid_queue = true;
4108 case GAUDI_QUEUE_ID_MME_0_0:
4109 db_reg_offset = mmMME2_QM_PQ_PI_0;
4112 case GAUDI_QUEUE_ID_MME_0_1:
4113 db_reg_offset = mmMME2_QM_PQ_PI_1;
4116 case GAUDI_QUEUE_ID_MME_0_2:
4117 db_reg_offset = mmMME2_QM_PQ_PI_2;
4120 case GAUDI_QUEUE_ID_MME_0_3:
4121 db_reg_offset = mmMME2_QM_PQ_PI_3;
4124 case GAUDI_QUEUE_ID_MME_1_0:
4125 db_reg_offset = mmMME0_QM_PQ_PI_0;
4128 case GAUDI_QUEUE_ID_MME_1_1:
4129 db_reg_offset = mmMME0_QM_PQ_PI_1;
4132 case GAUDI_QUEUE_ID_MME_1_2:
4133 db_reg_offset = mmMME0_QM_PQ_PI_2;
4136 case GAUDI_QUEUE_ID_MME_1_3:
4137 db_reg_offset = mmMME0_QM_PQ_PI_3;
4140 case GAUDI_QUEUE_ID_TPC_0_0:
4141 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4144 case GAUDI_QUEUE_ID_TPC_0_1:
4145 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4148 case GAUDI_QUEUE_ID_TPC_0_2:
4149 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4152 case GAUDI_QUEUE_ID_TPC_0_3:
4153 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4156 case GAUDI_QUEUE_ID_TPC_1_0:
4157 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4160 case GAUDI_QUEUE_ID_TPC_1_1:
4161 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4164 case GAUDI_QUEUE_ID_TPC_1_2:
4165 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4168 case GAUDI_QUEUE_ID_TPC_1_3:
4169 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4172 case GAUDI_QUEUE_ID_TPC_2_0:
4173 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4176 case GAUDI_QUEUE_ID_TPC_2_1:
4177 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4180 case GAUDI_QUEUE_ID_TPC_2_2:
4181 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4184 case GAUDI_QUEUE_ID_TPC_2_3:
4185 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4188 case GAUDI_QUEUE_ID_TPC_3_0:
4189 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4192 case GAUDI_QUEUE_ID_TPC_3_1:
4193 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4196 case GAUDI_QUEUE_ID_TPC_3_2:
4197 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4200 case GAUDI_QUEUE_ID_TPC_3_3:
4201 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4204 case GAUDI_QUEUE_ID_TPC_4_0:
4205 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4208 case GAUDI_QUEUE_ID_TPC_4_1:
4209 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4212 case GAUDI_QUEUE_ID_TPC_4_2:
4213 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4216 case GAUDI_QUEUE_ID_TPC_4_3:
4217 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4220 case GAUDI_QUEUE_ID_TPC_5_0:
4221 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4224 case GAUDI_QUEUE_ID_TPC_5_1:
4225 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4228 case GAUDI_QUEUE_ID_TPC_5_2:
4229 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4232 case GAUDI_QUEUE_ID_TPC_5_3:
4233 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4236 case GAUDI_QUEUE_ID_TPC_6_0:
4237 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4240 case GAUDI_QUEUE_ID_TPC_6_1:
4241 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4244 case GAUDI_QUEUE_ID_TPC_6_2:
4245 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4248 case GAUDI_QUEUE_ID_TPC_6_3:
4249 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4252 case GAUDI_QUEUE_ID_TPC_7_0:
4253 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4256 case GAUDI_QUEUE_ID_TPC_7_1:
4257 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4260 case GAUDI_QUEUE_ID_TPC_7_2:
4261 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4264 case GAUDI_QUEUE_ID_TPC_7_3:
4265 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4268 case GAUDI_QUEUE_ID_NIC_0_0:
4269 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4272 case GAUDI_QUEUE_ID_NIC_0_1:
4273 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4276 case GAUDI_QUEUE_ID_NIC_0_2:
4277 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4280 case GAUDI_QUEUE_ID_NIC_0_3:
4281 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4284 case GAUDI_QUEUE_ID_NIC_1_0:
4285 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4288 case GAUDI_QUEUE_ID_NIC_1_1:
4289 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4292 case GAUDI_QUEUE_ID_NIC_1_2:
4293 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4296 case GAUDI_QUEUE_ID_NIC_1_3:
4297 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4300 case GAUDI_QUEUE_ID_NIC_2_0:
4301 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4304 case GAUDI_QUEUE_ID_NIC_2_1:
4305 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4308 case GAUDI_QUEUE_ID_NIC_2_2:
4309 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4312 case GAUDI_QUEUE_ID_NIC_2_3:
4313 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4316 case GAUDI_QUEUE_ID_NIC_3_0:
4317 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4320 case GAUDI_QUEUE_ID_NIC_3_1:
4321 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4324 case GAUDI_QUEUE_ID_NIC_3_2:
4325 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4328 case GAUDI_QUEUE_ID_NIC_3_3:
4329 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4332 case GAUDI_QUEUE_ID_NIC_4_0:
4333 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4336 case GAUDI_QUEUE_ID_NIC_4_1:
4337 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4340 case GAUDI_QUEUE_ID_NIC_4_2:
4341 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4344 case GAUDI_QUEUE_ID_NIC_4_3:
4345 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4348 case GAUDI_QUEUE_ID_NIC_5_0:
4349 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4352 case GAUDI_QUEUE_ID_NIC_5_1:
4353 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4356 case GAUDI_QUEUE_ID_NIC_5_2:
4357 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4360 case GAUDI_QUEUE_ID_NIC_5_3:
4361 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4364 case GAUDI_QUEUE_ID_NIC_6_0:
4365 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4368 case GAUDI_QUEUE_ID_NIC_6_1:
4369 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4372 case GAUDI_QUEUE_ID_NIC_6_2:
4373 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4376 case GAUDI_QUEUE_ID_NIC_6_3:
4377 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4380 case GAUDI_QUEUE_ID_NIC_7_0:
4381 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4384 case GAUDI_QUEUE_ID_NIC_7_1:
4385 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4388 case GAUDI_QUEUE_ID_NIC_7_2:
4389 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4392 case GAUDI_QUEUE_ID_NIC_7_3:
4393 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4396 case GAUDI_QUEUE_ID_NIC_8_0:
4397 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4400 case GAUDI_QUEUE_ID_NIC_8_1:
4401 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4404 case GAUDI_QUEUE_ID_NIC_8_2:
4405 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4408 case GAUDI_QUEUE_ID_NIC_8_3:
4409 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4412 case GAUDI_QUEUE_ID_NIC_9_0:
4413 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4416 case GAUDI_QUEUE_ID_NIC_9_1:
4417 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4420 case GAUDI_QUEUE_ID_NIC_9_2:
4421 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4424 case GAUDI_QUEUE_ID_NIC_9_3:
4425 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4429 invalid_queue = true;
4432 if (invalid_queue) {
4433 /* Should never get here */
4434 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4441 /* ring the doorbell */
4442 WREG32(db_reg_offset, db_value);
4444 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
4445 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4446 GAUDI_EVENT_PI_UPDATE);
4449 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4452 __le64 *pbd = (__le64 *) bd;
4454 /* The QMANs are on the host memory so a simple copy suffice */
4459 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4460 dma_addr_t *dma_handle, gfp_t flags)
4462 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4465 /* Shift to the device's base physical address of host memory */
4467 *dma_handle += HOST_PHYS_BASE;
4472 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4473 void *cpu_addr, dma_addr_t dma_handle)
4475 /* Cancel the device's base physical address of host memory */
4476 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4478 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4481 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4483 struct asic_fixed_properties *prop = &hdev->asic_prop;
4484 u64 cur_addr = DRAM_BASE_ADDR_USER;
4489 while (cur_addr < prop->dram_end_address) {
4490 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4491 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4494 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4497 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4498 cur_addr, cur_addr + chunk_size);
4500 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4501 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4502 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4503 lower_32_bits(cur_addr));
4504 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4505 upper_32_bits(cur_addr));
4506 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4508 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4509 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4510 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4512 cur_addr += chunk_size;
4514 if (cur_addr == prop->dram_end_address)
4518 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4519 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4521 rc = hl_poll_timeout(
4523 mmDMA0_CORE_STS0 + dma_offset,
4525 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4527 HBM_SCRUBBING_TIMEOUT_US);
4531 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4541 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4543 struct asic_fixed_properties *prop = &hdev->asic_prop;
4544 struct gaudi_device *gaudi = hdev->asic_specific;
4549 if (!hdev->memory_scrub)
4552 if (!addr && !size) {
4553 /* Wait till device is idle */
4554 rc = hl_poll_timeout(
4556 mmDMA0_CORE_STS0/* dummy */,
4558 (hdev->asic_funcs->is_device_idle(hdev,
4561 HBM_SCRUBBING_TIMEOUT_US);
4563 dev_err(hdev->dev, "waiting for idle timeout\n");
4568 addr = prop->sram_user_base_address;
4569 size = hdev->pldm ? 0x10000 :
4570 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4571 val = 0x7777777777777777ull;
4573 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4576 "Failed to clear SRAM in mem scrub all\n");
4580 mutex_lock(&gaudi->clk_gate_mutex);
4581 hdev->asic_funcs->disable_clock_gating(hdev);
4583 /* Scrub HBM using all DMA channels in parallel */
4584 rc = gaudi_hbm_scrubbing(hdev);
4587 "Failed to clear HBM in mem scrub all\n");
4589 hdev->asic_funcs->set_clock_gating(hdev);
4590 mutex_unlock(&gaudi->clk_gate_mutex);
4596 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4597 u32 queue_id, dma_addr_t *dma_handle,
4600 struct gaudi_device *gaudi = hdev->asic_specific;
4601 struct gaudi_internal_qman_info *q;
4603 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4604 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4605 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4609 q = &gaudi->internal_qmans[queue_id];
4610 *dma_handle = q->pq_dma_addr;
4611 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4613 return q->pq_kernel_addr;
4616 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4617 u16 len, u32 timeout, u64 *result)
4619 struct gaudi_device *gaudi = hdev->asic_specific;
4621 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4628 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4630 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4634 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4636 struct packet_msg_prot *fence_pkt;
4637 dma_addr_t pkt_dma_addr;
4638 u32 fence_val, tmp, timeout_usec;
4639 dma_addr_t fence_dma_addr;
4644 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4646 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4648 fence_val = GAUDI_QMAN0_FENCE_VAL;
4650 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4654 "Failed to allocate memory for H/W queue %d testing\n",
4661 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4662 sizeof(struct packet_msg_prot),
4663 GFP_KERNEL, &pkt_dma_addr);
4666 "Failed to allocate packet for H/W queue %d testing\n",
4669 goto free_fence_ptr;
4672 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4673 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4674 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4676 fence_pkt->ctl = cpu_to_le32(tmp);
4677 fence_pkt->value = cpu_to_le32(fence_val);
4678 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4680 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4681 sizeof(struct packet_msg_prot),
4685 "Failed to send fence packet to H/W queue %d\n",
4690 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4691 1000, timeout_usec, true);
4693 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4695 if (rc == -ETIMEDOUT) {
4697 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4698 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4703 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4706 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4711 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4713 struct gaudi_device *gaudi = hdev->asic_specific;
4716 * check capability here as send_cpu_message() won't update the result
4717 * value if no capability
4719 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4722 return hl_fw_test_cpu_queue(hdev);
4725 static int gaudi_test_queues(struct hl_device *hdev)
4727 int i, rc, ret_val = 0;
4729 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4730 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4731 rc = gaudi_test_queue(hdev, i);
4737 rc = gaudi_test_cpu_queue(hdev);
4744 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4745 gfp_t mem_flags, dma_addr_t *dma_handle)
4749 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4752 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4754 /* Shift to the device's base physical address of host memory */
4756 *dma_handle += HOST_PHYS_BASE;
4761 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4762 dma_addr_t dma_addr)
4764 /* Cancel the device's base physical address of host memory */
4765 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4767 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4770 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4771 size_t size, dma_addr_t *dma_handle)
4773 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4776 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4777 size_t size, void *vaddr)
4779 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4782 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4783 int nents, enum dma_data_direction dir)
4785 struct scatterlist *sg;
4788 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4791 /* Shift to the device's base physical address of host memory */
4792 for_each_sg(sgl, sg, nents, i)
4793 sg->dma_address += HOST_PHYS_BASE;
4798 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4799 int nents, enum dma_data_direction dir)
4801 struct scatterlist *sg;
4804 /* Cancel the device's base physical address of host memory */
4805 for_each_sg(sgl, sg, nents, i)
4806 sg->dma_address -= HOST_PHYS_BASE;
4808 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4811 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4812 struct sg_table *sgt)
4814 struct scatterlist *sg, *sg_next_iter;
4815 u32 count, dma_desc_cnt;
4817 dma_addr_t addr, addr_next;
4821 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4823 len = sg_dma_len(sg);
4824 addr = sg_dma_address(sg);
4829 while ((count + 1) < sgt->nents) {
4830 sg_next_iter = sg_next(sg);
4831 len_next = sg_dma_len(sg_next_iter);
4832 addr_next = sg_dma_address(sg_next_iter);
4837 if ((addr + len == addr_next) &&
4838 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4850 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4853 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4854 struct hl_cs_parser *parser,
4855 struct packet_lin_dma *user_dma_pkt,
4856 u64 addr, enum dma_data_direction dir)
4858 struct hl_userptr *userptr;
4861 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4862 parser->job_userptr_list, &userptr))
4863 goto already_pinned;
4865 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
4869 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4874 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4876 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4877 userptr->sgt->nents, dir);
4879 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4883 userptr->dma_mapped = true;
4887 parser->patched_cb_size +=
4888 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4893 hl_unpin_host_memory(hdev, userptr);
4899 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4900 struct hl_cs_parser *parser,
4901 struct packet_lin_dma *user_dma_pkt,
4904 enum dma_data_direction dir;
4905 bool skip_host_mem_pin = false, user_memset;
4909 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4910 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4911 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4915 skip_host_mem_pin = true;
4917 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4918 dir = DMA_TO_DEVICE;
4919 addr = le64_to_cpu(user_dma_pkt->src_addr);
4921 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4922 dir = DMA_FROM_DEVICE;
4923 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4924 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4925 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4928 if (skip_host_mem_pin)
4929 parser->patched_cb_size += sizeof(*user_dma_pkt);
4931 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4937 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4938 struct hl_cs_parser *parser,
4939 struct packet_lin_dma *user_dma_pkt)
4941 bool src_in_host = false;
4942 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4943 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4944 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4946 dev_dbg(hdev->dev, "DMA packet details:\n");
4947 dev_dbg(hdev->dev, "source == 0x%llx\n",
4948 le64_to_cpu(user_dma_pkt->src_addr));
4949 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4950 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4953 * Special handling for DMA with size 0. Bypass all validations
4954 * because no transactions will be done except for WR_COMP, which
4955 * is not a security issue
4957 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4958 parser->patched_cb_size += sizeof(*user_dma_pkt);
4962 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4965 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4969 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4970 struct hl_cs_parser *parser,
4971 struct packet_load_and_exe *user_pkt)
4975 cfg = le32_to_cpu(user_pkt->cfg);
4977 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
4979 "User not allowed to use Load and Execute\n");
4983 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
4988 static int gaudi_validate_cb(struct hl_device *hdev,
4989 struct hl_cs_parser *parser, bool is_mmu)
4991 u32 cb_parsed_length = 0;
4994 parser->patched_cb_size = 0;
4996 /* cb_user_size is more than 0 so loop will always be executed */
4997 while (cb_parsed_length < parser->user_cb_size) {
4998 enum packet_id pkt_id;
5000 struct gaudi_packet *user_pkt;
5002 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5004 pkt_id = (enum packet_id) (
5005 (le64_to_cpu(user_pkt->header) &
5006 PACKET_HEADER_PACKET_ID_MASK) >>
5007 PACKET_HEADER_PACKET_ID_SHIFT);
5009 if (!validate_packet_id(pkt_id)) {
5010 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5015 pkt_size = gaudi_packet_sizes[pkt_id];
5016 cb_parsed_length += pkt_size;
5017 if (cb_parsed_length > parser->user_cb_size) {
5019 "packet 0x%x is out of CB boundary\n", pkt_id);
5025 case PACKET_MSG_PROT:
5027 "User not allowed to use MSG_PROT\n");
5032 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5037 dev_err(hdev->dev, "User not allowed to use STOP\n");
5041 case PACKET_WREG_BULK:
5043 "User not allowed to use WREG_BULK\n");
5047 case PACKET_LOAD_AND_EXE:
5048 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5049 (struct packet_load_and_exe *) user_pkt);
5052 case PACKET_LIN_DMA:
5053 parser->contains_dma_pkt = true;
5055 parser->patched_cb_size += pkt_size;
5057 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5058 (struct packet_lin_dma *) user_pkt);
5061 case PACKET_WREG_32:
5062 case PACKET_MSG_LONG:
5063 case PACKET_MSG_SHORT:
5067 case PACKET_ARB_POINT:
5068 parser->patched_cb_size += pkt_size;
5072 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5083 * The new CB should have space at the end for two MSG_PROT packets:
5084 * 1. A packet that will act as a completion packet
5085 * 2. A packet that will generate MSI-X interrupt
5087 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5092 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5093 struct hl_cs_parser *parser,
5094 struct packet_lin_dma *user_dma_pkt,
5095 struct packet_lin_dma *new_dma_pkt,
5096 u32 *new_dma_pkt_size)
5098 struct hl_userptr *userptr;
5099 struct scatterlist *sg, *sg_next_iter;
5100 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5102 dma_addr_t dma_addr, dma_addr_next;
5103 u64 device_memory_addr, addr;
5104 enum dma_data_direction dir;
5105 struct sg_table *sgt;
5106 bool src_in_host = false;
5107 bool skip_host_mem_pin = false;
5110 ctl = le32_to_cpu(user_dma_pkt->ctl);
5112 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5115 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5116 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5119 addr = le64_to_cpu(user_dma_pkt->src_addr);
5120 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5121 dir = DMA_TO_DEVICE;
5123 skip_host_mem_pin = true;
5125 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5126 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5127 dir = DMA_FROM_DEVICE;
5130 if ((!skip_host_mem_pin) &&
5131 (!hl_userptr_is_pinned(hdev, addr,
5132 le32_to_cpu(user_dma_pkt->tsize),
5133 parser->job_userptr_list, &userptr))) {
5134 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5135 addr, user_dma_pkt->tsize);
5139 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5140 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5141 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5145 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5150 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5151 len = sg_dma_len(sg);
5152 dma_addr = sg_dma_address(sg);
5157 while ((count + 1) < sgt->nents) {
5158 sg_next_iter = sg_next(sg);
5159 len_next = sg_dma_len(sg_next_iter);
5160 dma_addr_next = sg_dma_address(sg_next_iter);
5165 if ((dma_addr + len == dma_addr_next) &&
5166 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5175 ctl = le32_to_cpu(user_dma_pkt->ctl);
5176 if (likely(dma_desc_cnt))
5177 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5178 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5179 new_dma_pkt->ctl = cpu_to_le32(ctl);
5180 new_dma_pkt->tsize = cpu_to_le32(len);
5182 if (dir == DMA_TO_DEVICE) {
5183 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5184 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5186 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5187 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5191 device_memory_addr += len;
5196 if (!dma_desc_cnt) {
5198 "Error of 0 SG entries when patching DMA packet\n");
5202 /* Fix the last dma packet - wrcomp must be as user set it */
5204 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5206 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5211 static int gaudi_patch_cb(struct hl_device *hdev,
5212 struct hl_cs_parser *parser)
5214 u32 cb_parsed_length = 0;
5215 u32 cb_patched_cur_length = 0;
5218 /* cb_user_size is more than 0 so loop will always be executed */
5219 while (cb_parsed_length < parser->user_cb_size) {
5220 enum packet_id pkt_id;
5222 u32 new_pkt_size = 0;
5223 struct gaudi_packet *user_pkt, *kernel_pkt;
5225 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5226 kernel_pkt = parser->patched_cb->kernel_address +
5227 cb_patched_cur_length;
5229 pkt_id = (enum packet_id) (
5230 (le64_to_cpu(user_pkt->header) &
5231 PACKET_HEADER_PACKET_ID_MASK) >>
5232 PACKET_HEADER_PACKET_ID_SHIFT);
5234 if (!validate_packet_id(pkt_id)) {
5235 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5240 pkt_size = gaudi_packet_sizes[pkt_id];
5241 cb_parsed_length += pkt_size;
5242 if (cb_parsed_length > parser->user_cb_size) {
5244 "packet 0x%x is out of CB boundary\n", pkt_id);
5250 case PACKET_LIN_DMA:
5251 rc = gaudi_patch_dma_packet(hdev, parser,
5252 (struct packet_lin_dma *) user_pkt,
5253 (struct packet_lin_dma *) kernel_pkt,
5255 cb_patched_cur_length += new_pkt_size;
5258 case PACKET_MSG_PROT:
5260 "User not allowed to use MSG_PROT\n");
5265 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5270 dev_err(hdev->dev, "User not allowed to use STOP\n");
5274 case PACKET_WREG_32:
5275 case PACKET_WREG_BULK:
5276 case PACKET_MSG_LONG:
5277 case PACKET_MSG_SHORT:
5281 case PACKET_ARB_POINT:
5282 case PACKET_LOAD_AND_EXE:
5283 memcpy(kernel_pkt, user_pkt, pkt_size);
5284 cb_patched_cur_length += pkt_size;
5288 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5301 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5302 struct hl_cs_parser *parser)
5304 u64 patched_cb_handle;
5305 u32 patched_cb_size;
5306 struct hl_cb *user_cb;
5310 * The new CB should have space at the end for two MSG_PROT pkt:
5311 * 1. A packet that will act as a completion packet
5312 * 2. A packet that will generate MSI interrupt
5314 parser->patched_cb_size = parser->user_cb_size +
5315 sizeof(struct packet_msg_prot) * 2;
5317 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5318 parser->patched_cb_size, false, false,
5319 &patched_cb_handle);
5323 "Failed to allocate patched CB for DMA CS %d\n",
5328 patched_cb_handle >>= PAGE_SHIFT;
5329 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5330 (u32) patched_cb_handle);
5331 /* hl_cb_get should never fail here so use kernel WARN */
5332 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5333 (u32) patched_cb_handle);
5334 if (!parser->patched_cb) {
5340 * The check that parser->user_cb_size <= parser->user_cb->size was done
5341 * in validate_queue_index().
5343 memcpy(parser->patched_cb->kernel_address,
5344 parser->user_cb->kernel_address,
5345 parser->user_cb_size);
5347 patched_cb_size = parser->patched_cb_size;
5349 /* Validate patched CB instead of user CB */
5350 user_cb = parser->user_cb;
5351 parser->user_cb = parser->patched_cb;
5352 rc = gaudi_validate_cb(hdev, parser, true);
5353 parser->user_cb = user_cb;
5356 hl_cb_put(parser->patched_cb);
5360 if (patched_cb_size != parser->patched_cb_size) {
5361 dev_err(hdev->dev, "user CB size mismatch\n");
5362 hl_cb_put(parser->patched_cb);
5369 * Always call cb destroy here because we still have 1 reference
5370 * to it by calling cb_get earlier. After the job will be completed,
5371 * cb_put will release it, but here we want to remove it from the
5374 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5375 patched_cb_handle << PAGE_SHIFT);
5380 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5381 struct hl_cs_parser *parser)
5383 u64 patched_cb_handle;
5386 rc = gaudi_validate_cb(hdev, parser, false);
5391 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5392 parser->patched_cb_size, false, false,
5393 &patched_cb_handle);
5396 "Failed to allocate patched CB for DMA CS %d\n", rc);
5400 patched_cb_handle >>= PAGE_SHIFT;
5401 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5402 (u32) patched_cb_handle);
5403 /* hl_cb_get should never fail here so use kernel WARN */
5404 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5405 (u32) patched_cb_handle);
5406 if (!parser->patched_cb) {
5411 rc = gaudi_patch_cb(hdev, parser);
5414 hl_cb_put(parser->patched_cb);
5418 * Always call cb destroy here because we still have 1 reference
5419 * to it by calling cb_get earlier. After the job will be completed,
5420 * cb_put will release it, but here we want to remove it from the
5423 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5424 patched_cb_handle << PAGE_SHIFT);
5428 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5432 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5433 struct hl_cs_parser *parser)
5435 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5436 struct gaudi_device *gaudi = hdev->asic_specific;
5437 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5438 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5440 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5441 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5442 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5443 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5444 parser->hw_queue_id);
5448 /* For internal queue jobs just check if CB address is valid */
5449 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5450 parser->user_cb_size,
5451 asic_prop->sram_user_base_address,
5452 asic_prop->sram_end_address))
5455 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5456 parser->user_cb_size,
5457 asic_prop->dram_user_base_address,
5458 asic_prop->dram_end_address))
5461 /* PMMU and HPMMU addresses are equal, check only one of them */
5462 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5463 parser->user_cb_size,
5464 asic_prop->pmmu.start_addr,
5465 asic_prop->pmmu.end_addr))
5469 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5470 parser->user_cb, parser->user_cb_size);
5475 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5477 struct gaudi_device *gaudi = hdev->asic_specific;
5479 if (parser->queue_type == QUEUE_TYPE_INT)
5480 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5482 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5483 return gaudi_parse_cb_mmu(hdev, parser);
5485 return gaudi_parse_cb_no_mmu(hdev, parser);
5488 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5489 void *kernel_address, u32 len,
5490 u64 cq_addr, u32 cq_val, u32 msi_vec,
5493 struct gaudi_device *gaudi = hdev->asic_specific;
5494 struct packet_msg_prot *cq_pkt;
5497 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5499 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5500 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5503 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5505 cq_pkt->ctl = cpu_to_le32(tmp);
5506 cq_pkt->value = cpu_to_le32(cq_val);
5507 cq_pkt->addr = cpu_to_le64(cq_addr);
5511 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5512 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5513 cq_pkt->ctl = cpu_to_le32(tmp);
5514 cq_pkt->value = cpu_to_le32(1);
5516 if (!gaudi->multi_msi_mode)
5519 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5522 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5524 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5527 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5530 struct packet_lin_dma *lin_dma_pkt;
5531 struct hl_cs_job *job;
5532 u32 cb_size, ctl, err_cause;
5536 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5540 lin_dma_pkt = cb->kernel_address;
5541 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5542 cb_size = sizeof(*lin_dma_pkt);
5544 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5545 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5546 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5550 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5551 lin_dma_pkt->src_addr = cpu_to_le64(val);
5552 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5553 lin_dma_pkt->tsize = cpu_to_le32(size);
5555 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5557 dev_err(hdev->dev, "Failed to allocate a new job\n");
5562 /* Verify DMA is OK */
5563 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5564 if (err_cause && !hdev->init_done) {
5566 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5568 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5573 atomic_inc(&job->user_cb->cs_cnt);
5574 job->user_cb_size = cb_size;
5575 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5576 job->patched_cb = job->user_cb;
5577 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5579 hl_debugfs_add_job(hdev, job);
5581 rc = gaudi_send_job_on_qman0(hdev, job);
5582 hl_debugfs_remove_job(hdev, job);
5584 atomic_dec(&cb->cs_cnt);
5586 /* Verify DMA is OK */
5587 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5589 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5591 if (!hdev->init_done) {
5593 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5595 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5601 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5606 static void gaudi_restore_sm_registers(struct hl_device *hdev)
5610 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
5611 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5612 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5613 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5616 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
5617 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5618 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5619 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5622 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
5624 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
5625 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5627 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
5629 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
5630 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5633 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5635 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5636 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5639 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5640 u64 sob_addr = CFG_BASE +
5641 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5643 u32 dma_offset = i * DMA_CORE_OFFSET;
5645 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5646 lower_32_bits(sob_addr));
5647 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5648 upper_32_bits(sob_addr));
5649 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5651 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5652 * modified by the user for SRAM reduction
5655 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5660 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5665 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5666 qman_offset = i * DMA_QMAN_OFFSET;
5667 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5670 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5671 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5672 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5675 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5676 qman_offset = i * TPC_QMAN_OFFSET;
5677 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5680 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5681 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5682 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5683 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5687 static void gaudi_restore_user_registers(struct hl_device *hdev)
5689 gaudi_restore_sm_registers(hdev);
5690 gaudi_restore_dma_registers(hdev);
5691 gaudi_restore_qm_registers(hdev);
5694 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5696 gaudi_restore_user_registers(hdev);
5701 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5703 struct asic_fixed_properties *prop = &hdev->asic_prop;
5704 struct gaudi_device *gaudi = hdev->asic_specific;
5705 u64 addr = prop->mmu_pgt_addr;
5706 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5708 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5711 return gaudi_memset_device_memory(hdev, addr, size, 0);
5714 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5719 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
5721 struct asic_fixed_properties *prop = &hdev->asic_prop;
5722 struct gaudi_device *gaudi = hdev->asic_specific;
5726 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5728 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5729 (hdev->clock_gating_mask &
5730 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5732 dev_err_ratelimited(hdev->dev,
5733 "Can't read register - clock gating is enabled!\n");
5736 *val = RREG32(addr - CFG_BASE);
5739 } else if ((addr >= SRAM_BASE_ADDR) &&
5740 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5741 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
5742 (addr - SRAM_BASE_ADDR));
5743 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5744 u64 bar_base_addr = DRAM_PHYS_BASE +
5745 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5747 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5748 if (hbm_bar_addr != U64_MAX) {
5749 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
5750 (addr - bar_base_addr));
5752 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5755 if (hbm_bar_addr == U64_MAX)
5757 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5758 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
5766 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
5768 struct asic_fixed_properties *prop = &hdev->asic_prop;
5769 struct gaudi_device *gaudi = hdev->asic_specific;
5773 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5775 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5776 (hdev->clock_gating_mask &
5777 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5779 dev_err_ratelimited(hdev->dev,
5780 "Can't write register - clock gating is enabled!\n");
5783 WREG32(addr - CFG_BASE, val);
5786 } else if ((addr >= SRAM_BASE_ADDR) &&
5787 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5788 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
5789 (addr - SRAM_BASE_ADDR));
5790 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5791 u64 bar_base_addr = DRAM_PHYS_BASE +
5792 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5794 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5795 if (hbm_bar_addr != U64_MAX) {
5796 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
5797 (addr - bar_base_addr));
5799 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5802 if (hbm_bar_addr == U64_MAX)
5804 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5805 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5813 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
5815 struct asic_fixed_properties *prop = &hdev->asic_prop;
5816 struct gaudi_device *gaudi = hdev->asic_specific;
5820 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
5822 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5823 (hdev->clock_gating_mask &
5824 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5826 dev_err_ratelimited(hdev->dev,
5827 "Can't read register - clock gating is enabled!\n");
5830 u32 val_l = RREG32(addr - CFG_BASE);
5831 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
5833 *val = (((u64) val_h) << 32) | val_l;
5836 } else if ((addr >= SRAM_BASE_ADDR) &&
5837 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5838 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
5839 (addr - SRAM_BASE_ADDR));
5841 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5842 u64 bar_base_addr = DRAM_PHYS_BASE +
5843 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5845 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5846 if (hbm_bar_addr != U64_MAX) {
5847 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
5848 (addr - bar_base_addr));
5850 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5853 if (hbm_bar_addr == U64_MAX)
5855 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5856 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
5864 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
5866 struct asic_fixed_properties *prop = &hdev->asic_prop;
5867 struct gaudi_device *gaudi = hdev->asic_specific;
5871 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
5873 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5874 (hdev->clock_gating_mask &
5875 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5877 dev_err_ratelimited(hdev->dev,
5878 "Can't write register - clock gating is enabled!\n");
5881 WREG32(addr - CFG_BASE, lower_32_bits(val));
5882 WREG32(addr + sizeof(u32) - CFG_BASE,
5883 upper_32_bits(val));
5886 } else if ((addr >= SRAM_BASE_ADDR) &&
5887 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5888 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
5889 (addr - SRAM_BASE_ADDR));
5891 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5892 u64 bar_base_addr = DRAM_PHYS_BASE +
5893 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5895 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5896 if (hbm_bar_addr != U64_MAX) {
5897 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5898 (addr - bar_base_addr));
5900 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5903 if (hbm_bar_addr == U64_MAX)
5905 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5906 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5914 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
5916 struct gaudi_device *gaudi = hdev->asic_specific;
5918 if (hdev->hard_reset_pending)
5921 return readq(hdev->pcie_bar[HBM_BAR_ID] +
5922 (addr - gaudi->hbm_bar_cur_addr));
5925 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
5927 struct gaudi_device *gaudi = hdev->asic_specific;
5929 if (hdev->hard_reset_pending)
5932 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5933 (addr - gaudi->hbm_bar_cur_addr));
5936 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
5938 /* mask to zero the MMBP and ASID bits */
5939 WREG32_AND(reg, ~0x7FF);
5940 WREG32_OR(reg, asid);
5943 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
5945 struct gaudi_device *gaudi = hdev->asic_specific;
5947 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5950 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
5951 WARN(1, "asid %u is too big\n", asid);
5955 mutex_lock(&gaudi->clk_gate_mutex);
5957 hdev->asic_funcs->disable_clock_gating(hdev);
5959 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5960 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5961 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5962 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5963 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5965 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5966 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5967 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5968 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5969 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5971 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5972 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5973 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5974 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5975 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5977 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5978 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5979 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5980 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5981 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5983 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
5984 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
5985 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
5986 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
5987 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
5989 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
5990 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
5991 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
5992 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
5993 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
5995 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
5996 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
5997 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5998 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5999 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6001 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6002 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6003 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6004 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6005 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6007 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6008 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6009 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6010 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6011 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6012 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6013 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6014 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6016 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6017 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6018 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6019 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6020 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6021 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6022 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6024 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6025 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6026 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6027 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6028 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6029 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6030 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6032 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6033 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6034 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6035 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6036 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6037 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6038 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6040 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6041 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6042 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6043 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6044 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6045 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6046 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6048 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6049 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6050 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6051 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6052 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6053 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6054 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6056 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6057 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6058 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6059 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6060 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6061 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6062 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6064 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6065 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6066 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6067 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6068 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6069 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6070 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6072 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6073 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6074 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6075 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6076 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6077 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6078 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6080 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6081 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6082 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6083 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6084 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6086 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6087 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6088 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6089 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6092 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6093 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6094 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6095 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6096 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6098 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6099 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6100 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6101 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6102 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6104 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6105 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6107 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6109 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6111 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6113 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6117 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6118 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6120 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6122 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6124 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6126 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6130 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6131 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6133 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6135 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6137 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6139 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6143 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6144 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6146 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6148 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6150 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6152 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6156 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6157 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6159 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6161 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6163 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6165 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6169 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6170 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6172 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6174 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6176 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6178 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6182 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6183 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6185 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6187 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6189 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6191 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6195 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6196 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6198 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6200 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6202 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6204 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6208 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6209 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6211 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6213 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6215 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6217 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6221 if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6222 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6224 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6226 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6228 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6230 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6234 hdev->asic_funcs->set_clock_gating(hdev);
6236 mutex_unlock(&gaudi->clk_gate_mutex);
6239 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6240 struct hl_cs_job *job)
6242 struct packet_msg_prot *fence_pkt;
6244 dma_addr_t fence_dma_addr;
6246 u32 tmp, timeout, dma_offset;
6250 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6252 timeout = HL_DEVICE_TIMEOUT_USEC;
6254 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
6255 dev_err_ratelimited(hdev->dev,
6256 "Can't send driver job on QMAN0 because the device is not idle\n");
6260 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6264 "Failed to allocate fence memory for QMAN0\n");
6268 cb = job->patched_cb;
6270 fence_pkt = cb->kernel_address +
6271 job->job_cb_size - sizeof(struct packet_msg_prot);
6273 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6274 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6275 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6277 fence_pkt->ctl = cpu_to_le32(tmp);
6278 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6279 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6281 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6283 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6285 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6286 job->job_cb_size, cb->bus_address);
6288 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6289 goto free_fence_ptr;
6292 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6293 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6296 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6298 if (rc == -ETIMEDOUT) {
6299 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6300 goto free_fence_ptr;
6304 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6305 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6307 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6312 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6314 if (event_type >= GAUDI_EVENT_SIZE)
6315 goto event_not_supported;
6317 if (!gaudi_irq_map_table[event_type].valid)
6318 goto event_not_supported;
6320 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6324 event_not_supported:
6325 snprintf(desc, size, "N/A");
6328 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6329 u32 x_y, bool is_write)
6331 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6333 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6334 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6337 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6338 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6342 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6343 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6347 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6348 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6352 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6353 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6358 goto unknown_initiator;
6361 for (i = 0 ; i < 2 ; i++) {
6362 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6363 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6367 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6368 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6369 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6371 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6374 return "DMA0 or DMA2";
6375 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6376 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6377 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6379 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6382 return "DMA1 or DMA3";
6383 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6384 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6385 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6387 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6390 return "DMA4 or DMA6";
6391 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6392 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6393 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6395 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6398 return "DMA5 or DMA7";
6402 return "unknown initiator";
6405 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6408 u32 val, x_y, axi_id;
6410 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6411 RREG32(mmMMU_UP_RAZWI_READ_ID);
6412 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6413 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6414 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6415 RAZWI_INITIATOR_AXI_ID_SHIFT);
6418 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6419 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6421 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6424 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6426 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6427 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6429 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6430 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6432 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6434 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6435 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6437 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6439 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6441 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6444 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6445 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6446 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6447 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6448 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6449 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6450 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6451 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6452 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6453 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6454 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6456 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6458 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6461 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6463 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6464 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6466 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6467 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6469 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6471 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6472 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6474 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6476 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6484 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6486 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6487 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6488 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6489 RAZWI_INITIATOR_AXI_ID_MASK);
6491 return "unknown initiator";
6494 static void gaudi_print_razwi_info(struct hl_device *hdev)
6496 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6497 dev_err_ratelimited(hdev->dev,
6498 "RAZWI event caused by illegal write of %s\n",
6499 gaudi_get_razwi_initiator_name(hdev, true));
6500 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6503 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6504 dev_err_ratelimited(hdev->dev,
6505 "RAZWI event caused by illegal read of %s\n",
6506 gaudi_get_razwi_initiator_name(hdev, false));
6507 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6511 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6513 struct gaudi_device *gaudi = hdev->asic_specific;
6517 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6520 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6521 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6522 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6524 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6526 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6529 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6532 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6533 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6534 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6536 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6538 dev_err_ratelimited(hdev->dev,
6539 "MMU access error on va 0x%llx\n", addr);
6541 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6546 * +-------------------+------------------------------------------------------+
6547 * | Configuration Reg | Description |
6549 * +-------------------+------------------------------------------------------+
6550 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6551 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6552 * | |0xF34 memory wrappers 63:32 |
6553 * | |0xF38 memory wrappers 95:64 |
6554 * | |0xF3C memory wrappers 127:96 |
6555 * +-------------------+------------------------------------------------------+
6556 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6557 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6558 * | |0xF44 memory wrappers 63:32 |
6559 * | |0xF48 memory wrappers 95:64 |
6560 * | |0xF4C memory wrappers 127:96 |
6561 * +-------------------+------------------------------------------------------+
6563 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6564 struct ecc_info_extract_params *params, u64 *ecc_address,
6565 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6567 struct gaudi_device *gaudi = hdev->asic_specific;
6568 u32 i, num_mem_regs, reg, err_bit;
6569 u64 err_addr, err_word = 0;
6572 num_mem_regs = params->num_memories / 32 +
6573 ((params->num_memories % 32) ? 1 : 0);
6575 if (params->block_address >= CFG_BASE)
6576 params->block_address -= CFG_BASE;
6579 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6581 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6583 if (params->disable_clock_gating) {
6584 mutex_lock(&gaudi->clk_gate_mutex);
6585 hdev->asic_funcs->disable_clock_gating(hdev);
6588 /* Set invalid wrapper index */
6589 *memory_wrapper_idx = 0xFF;
6591 /* Iterate through memory wrappers, a single bit must be set */
6592 for (i = 0 ; i < num_mem_regs ; i++) {
6594 err_word = RREG32(err_addr);
6596 err_bit = __ffs(err_word);
6597 *memory_wrapper_idx = err_bit + (32 * i);
6602 if (*memory_wrapper_idx == 0xFF) {
6603 dev_err(hdev->dev, "ECC error information cannot be found\n");
6605 goto enable_clk_gate;
6608 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6609 *memory_wrapper_idx);
6612 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6614 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6616 /* Clear error indication */
6617 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6619 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6621 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6623 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6626 if (params->disable_clock_gating) {
6627 hdev->asic_funcs->set_clock_gating(hdev);
6629 mutex_unlock(&gaudi->clk_gate_mutex);
6635 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6636 const char *qm_name,
6640 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6643 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6644 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6645 glbl_sts_clr_val = 0;
6646 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6651 if (i == QMAN_STREAMS)
6652 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6654 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6656 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6657 if (glbl_sts_val & BIT(j)) {
6658 dev_err_ratelimited(hdev->dev,
6659 "%s %s. err cause: %s\n",
6661 gaudi_qman_error_cause[j]);
6662 glbl_sts_clr_val |= BIT(j);
6666 /* Write 1 clear errors */
6667 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6670 arb_err_val = RREG32(arb_err_addr);
6675 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6676 if (arb_err_val & BIT(j)) {
6677 dev_err_ratelimited(hdev->dev,
6678 "%s ARB_ERR. err cause: %s\n",
6680 gaudi_qman_arb_error_cause[j]);
6685 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6686 struct hl_eq_ecc_data *ecc_data)
6688 struct ecc_info_extract_params params;
6689 u64 ecc_address = 0, ecc_syndrom = 0;
6690 u8 index, memory_wrapper_idx = 0;
6691 bool extract_info_from_fw;
6694 switch (event_type) {
6695 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
6696 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
6697 extract_info_from_fw = true;
6699 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6700 index = event_type - GAUDI_EVENT_TPC0_SERR;
6701 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6702 params.num_memories = 90;
6703 params.derr = false;
6704 params.disable_clock_gating = true;
6705 extract_info_from_fw = false;
6707 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6708 index = event_type - GAUDI_EVENT_TPC0_DERR;
6709 params.block_address =
6710 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6711 params.num_memories = 90;
6713 params.disable_clock_gating = true;
6714 extract_info_from_fw = false;
6716 case GAUDI_EVENT_MME0_ACC_SERR:
6717 case GAUDI_EVENT_MME1_ACC_SERR:
6718 case GAUDI_EVENT_MME2_ACC_SERR:
6719 case GAUDI_EVENT_MME3_ACC_SERR:
6720 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
6721 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6722 params.num_memories = 128;
6723 params.derr = false;
6724 params.disable_clock_gating = true;
6725 extract_info_from_fw = false;
6727 case GAUDI_EVENT_MME0_ACC_DERR:
6728 case GAUDI_EVENT_MME1_ACC_DERR:
6729 case GAUDI_EVENT_MME2_ACC_DERR:
6730 case GAUDI_EVENT_MME3_ACC_DERR:
6731 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
6732 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6733 params.num_memories = 128;
6735 params.disable_clock_gating = true;
6736 extract_info_from_fw = false;
6738 case GAUDI_EVENT_MME0_SBAB_SERR:
6739 case GAUDI_EVENT_MME1_SBAB_SERR:
6740 case GAUDI_EVENT_MME2_SBAB_SERR:
6741 case GAUDI_EVENT_MME3_SBAB_SERR:
6742 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
6743 params.block_address =
6744 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6745 params.num_memories = 33;
6746 params.derr = false;
6747 params.disable_clock_gating = true;
6748 extract_info_from_fw = false;
6750 case GAUDI_EVENT_MME0_SBAB_DERR:
6751 case GAUDI_EVENT_MME1_SBAB_DERR:
6752 case GAUDI_EVENT_MME2_SBAB_DERR:
6753 case GAUDI_EVENT_MME3_SBAB_DERR:
6754 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
6755 params.block_address =
6756 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6757 params.num_memories = 33;
6759 params.disable_clock_gating = true;
6760 extract_info_from_fw = false;
6766 if (extract_info_from_fw) {
6767 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6768 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6769 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6771 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
6772 &ecc_syndrom, &memory_wrapper_idx);
6778 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6779 ecc_address, ecc_syndrom, memory_wrapper_idx);
6782 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6784 u64 glbl_sts_addr, arb_err_addr;
6788 switch (event_type) {
6789 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6790 index = event_type - GAUDI_EVENT_TPC0_QM;
6792 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6794 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6795 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6797 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6798 index = event_type - GAUDI_EVENT_MME0_QM;
6800 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
6802 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
6803 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
6805 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
6806 index = event_type - GAUDI_EVENT_DMA0_QM;
6808 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
6810 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
6811 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
6813 case GAUDI_EVENT_NIC0_QM0:
6814 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
6815 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
6816 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
6818 case GAUDI_EVENT_NIC0_QM1:
6819 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
6820 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
6821 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
6823 case GAUDI_EVENT_NIC1_QM0:
6824 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
6825 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
6826 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
6828 case GAUDI_EVENT_NIC1_QM1:
6829 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
6830 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
6831 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
6833 case GAUDI_EVENT_NIC2_QM0:
6834 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
6835 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
6836 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
6838 case GAUDI_EVENT_NIC2_QM1:
6839 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
6840 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
6841 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
6843 case GAUDI_EVENT_NIC3_QM0:
6844 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
6845 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
6846 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
6848 case GAUDI_EVENT_NIC3_QM1:
6849 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
6850 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
6851 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
6853 case GAUDI_EVENT_NIC4_QM0:
6854 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
6855 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
6856 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
6858 case GAUDI_EVENT_NIC4_QM1:
6859 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
6860 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
6861 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
6867 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
6870 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
6875 gaudi_get_event_desc(event_type, desc, sizeof(desc));
6876 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6880 gaudi_print_razwi_info(hdev);
6881 gaudi_print_mmu_error_info(hdev);
6885 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
6887 struct gaudi_device *gaudi = hdev->asic_specific;
6889 /* Unmask all IRQs since some could have been received
6890 * during the soft reset
6892 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
6895 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
6896 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
6898 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
6901 if (!hdev->asic_prop.fw_security_disabled) {
6902 if (!hbm_ecc_data) {
6903 dev_err(hdev->dev, "No FW ECC data");
6907 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
6908 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6909 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
6910 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6911 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
6912 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6913 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
6914 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6915 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
6916 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6917 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
6918 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6919 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
6920 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6923 "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6924 device, ch, type, wr_par, rd_par, ca_par, serr, derr);
6931 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
6932 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
6933 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
6934 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6938 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6939 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
6940 (val >> 2) & 0x1, (val >> 3) & 0x1,
6943 val2 = RREG32(base + ch * 0x1000 + 0x060);
6945 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6947 RREG32(base + ch * 0x1000 + 0x064),
6948 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6949 (val2 & 0xFF0000) >> 16,
6950 (val2 & 0xFF000000) >> 24);
6953 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
6954 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6958 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6959 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
6960 (val >> 2) & 0x1, (val >> 3) & 0x1,
6963 val2 = RREG32(base + ch * 0x1000 + 0x070);
6965 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6967 RREG32(base + ch * 0x1000 + 0x074),
6968 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6969 (val2 & 0xFF0000) >> 16,
6970 (val2 & 0xFF000000) >> 24);
6973 /* Clear interrupts */
6974 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
6975 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
6976 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
6977 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
6978 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
6979 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
6982 val = RREG32(base + 0x8F30);
6983 val2 = RREG32(base + 0x8F34);
6987 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6990 val = RREG32(base + 0x8F40);
6991 val2 = RREG32(base + 0x8F44);
6995 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7002 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7004 switch (hbm_event_type) {
7005 case GAUDI_EVENT_HBM0_SPI_0:
7006 case GAUDI_EVENT_HBM0_SPI_1:
7008 case GAUDI_EVENT_HBM1_SPI_0:
7009 case GAUDI_EVENT_HBM1_SPI_1:
7011 case GAUDI_EVENT_HBM2_SPI_0:
7012 case GAUDI_EVENT_HBM2_SPI_1:
7014 case GAUDI_EVENT_HBM3_SPI_0:
7015 case GAUDI_EVENT_HBM3_SPI_1:
7021 /* Should never happen */
7025 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7026 char *interrupt_name)
7028 struct gaudi_device *gaudi = hdev->asic_specific;
7029 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7030 bool soft_reset_required = false;
7032 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7033 * gating, and thus cannot be done in CPU-CP and should be done instead
7037 mutex_lock(&gaudi->clk_gate_mutex);
7039 hdev->asic_funcs->disable_clock_gating(hdev);
7041 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7042 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7044 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7045 if (tpc_interrupts_cause & BIT(i)) {
7046 dev_err_ratelimited(hdev->dev,
7047 "TPC%d_%s interrupt cause: %s\n",
7048 tpc_id, interrupt_name,
7049 gaudi_tpc_interrupts_cause[i]);
7050 /* If this is QM error, we need to soft-reset */
7052 soft_reset_required = true;
7055 /* Clear interrupts */
7056 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7058 hdev->asic_funcs->set_clock_gating(hdev);
7060 mutex_unlock(&gaudi->clk_gate_mutex);
7062 return soft_reset_required;
7065 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7067 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7070 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7072 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7075 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7078 switch (event_type) {
7079 case GAUDI_EVENT_FIX_POWER_ENV_S:
7080 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7081 dev_info_ratelimited(hdev->dev,
7082 "Clock throttling due to power consumption\n");
7085 case GAUDI_EVENT_FIX_POWER_ENV_E:
7086 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7087 dev_info_ratelimited(hdev->dev,
7088 "Power envelop is safe, back to optimal clock\n");
7091 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7092 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7093 dev_info_ratelimited(hdev->dev,
7094 "Clock throttling due to overheating\n");
7097 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7098 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7099 dev_info_ratelimited(hdev->dev,
7100 "Thermal envelop is safe, back to optimal clock\n");
7104 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7110 static void gaudi_handle_eqe(struct hl_device *hdev,
7111 struct hl_eq_entry *eq_entry)
7113 struct gaudi_device *gaudi = hdev->asic_specific;
7114 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7115 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7116 >> EQ_CTL_EVENT_TYPE_SHIFT);
7118 bool reset_required;
7120 gaudi->events_stat[event_type]++;
7121 gaudi->events_stat_aggregate[event_type]++;
7123 switch (event_type) {
7124 case GAUDI_EVENT_PCIE_CORE_DERR:
7125 case GAUDI_EVENT_PCIE_IF_DERR:
7126 case GAUDI_EVENT_PCIE_PHY_DERR:
7127 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7128 case GAUDI_EVENT_MME0_ACC_DERR:
7129 case GAUDI_EVENT_MME0_SBAB_DERR:
7130 case GAUDI_EVENT_MME1_ACC_DERR:
7131 case GAUDI_EVENT_MME1_SBAB_DERR:
7132 case GAUDI_EVENT_MME2_ACC_DERR:
7133 case GAUDI_EVENT_MME2_SBAB_DERR:
7134 case GAUDI_EVENT_MME3_ACC_DERR:
7135 case GAUDI_EVENT_MME3_SBAB_DERR:
7136 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7138 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7139 case GAUDI_EVENT_PSOC_MEM_DERR:
7140 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7141 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7142 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7143 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7144 case GAUDI_EVENT_MMU_DERR:
7145 gaudi_print_irq_info(hdev, event_type, true);
7146 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7147 if (hdev->hard_reset_on_fw_events)
7148 hl_device_reset(hdev, true, false);
7151 case GAUDI_EVENT_GIC500:
7152 case GAUDI_EVENT_AXI_ECC:
7153 case GAUDI_EVENT_L2_RAM_ECC:
7154 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7155 gaudi_print_irq_info(hdev, event_type, false);
7156 if (hdev->hard_reset_on_fw_events)
7157 hl_device_reset(hdev, true, false);
7160 case GAUDI_EVENT_HBM0_SPI_0:
7161 case GAUDI_EVENT_HBM1_SPI_0:
7162 case GAUDI_EVENT_HBM2_SPI_0:
7163 case GAUDI_EVENT_HBM3_SPI_0:
7164 gaudi_print_irq_info(hdev, event_type, false);
7165 gaudi_hbm_read_interrupts(hdev,
7166 gaudi_hbm_event_to_dev(event_type),
7167 &eq_entry->hbm_ecc_data);
7168 if (hdev->hard_reset_on_fw_events)
7169 hl_device_reset(hdev, true, false);
7172 case GAUDI_EVENT_HBM0_SPI_1:
7173 case GAUDI_EVENT_HBM1_SPI_1:
7174 case GAUDI_EVENT_HBM2_SPI_1:
7175 case GAUDI_EVENT_HBM3_SPI_1:
7176 gaudi_print_irq_info(hdev, event_type, false);
7177 gaudi_hbm_read_interrupts(hdev,
7178 gaudi_hbm_event_to_dev(event_type),
7179 &eq_entry->hbm_ecc_data);
7182 case GAUDI_EVENT_TPC0_DEC:
7183 case GAUDI_EVENT_TPC1_DEC:
7184 case GAUDI_EVENT_TPC2_DEC:
7185 case GAUDI_EVENT_TPC3_DEC:
7186 case GAUDI_EVENT_TPC4_DEC:
7187 case GAUDI_EVENT_TPC5_DEC:
7188 case GAUDI_EVENT_TPC6_DEC:
7189 case GAUDI_EVENT_TPC7_DEC:
7190 gaudi_print_irq_info(hdev, event_type, true);
7191 reset_required = gaudi_tpc_read_interrupts(hdev,
7192 tpc_dec_event_to_tpc_id(event_type),
7193 "AXI_SLV_DEC_Error");
7194 if (reset_required) {
7195 dev_err(hdev->dev, "hard reset required due to %s\n",
7196 gaudi_irq_map_table[event_type].name);
7198 if (hdev->hard_reset_on_fw_events)
7199 hl_device_reset(hdev, true, false);
7201 hl_fw_unmask_irq(hdev, event_type);
7205 case GAUDI_EVENT_TPC0_KRN_ERR:
7206 case GAUDI_EVENT_TPC1_KRN_ERR:
7207 case GAUDI_EVENT_TPC2_KRN_ERR:
7208 case GAUDI_EVENT_TPC3_KRN_ERR:
7209 case GAUDI_EVENT_TPC4_KRN_ERR:
7210 case GAUDI_EVENT_TPC5_KRN_ERR:
7211 case GAUDI_EVENT_TPC6_KRN_ERR:
7212 case GAUDI_EVENT_TPC7_KRN_ERR:
7213 gaudi_print_irq_info(hdev, event_type, true);
7214 reset_required = gaudi_tpc_read_interrupts(hdev,
7215 tpc_krn_event_to_tpc_id(event_type),
7217 if (reset_required) {
7218 dev_err(hdev->dev, "hard reset required due to %s\n",
7219 gaudi_irq_map_table[event_type].name);
7221 if (hdev->hard_reset_on_fw_events)
7222 hl_device_reset(hdev, true, false);
7224 hl_fw_unmask_irq(hdev, event_type);
7228 case GAUDI_EVENT_PCIE_CORE_SERR:
7229 case GAUDI_EVENT_PCIE_IF_SERR:
7230 case GAUDI_EVENT_PCIE_PHY_SERR:
7231 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7232 case GAUDI_EVENT_MME0_ACC_SERR:
7233 case GAUDI_EVENT_MME0_SBAB_SERR:
7234 case GAUDI_EVENT_MME1_ACC_SERR:
7235 case GAUDI_EVENT_MME1_SBAB_SERR:
7236 case GAUDI_EVENT_MME2_ACC_SERR:
7237 case GAUDI_EVENT_MME2_SBAB_SERR:
7238 case GAUDI_EVENT_MME3_ACC_SERR:
7239 case GAUDI_EVENT_MME3_SBAB_SERR:
7240 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7241 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7242 case GAUDI_EVENT_PSOC_MEM_SERR:
7243 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7244 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7245 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7246 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7248 case GAUDI_EVENT_MMU_SERR:
7249 gaudi_print_irq_info(hdev, event_type, true);
7250 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7251 hl_fw_unmask_irq(hdev, event_type);
7254 case GAUDI_EVENT_PCIE_DEC:
7255 case GAUDI_EVENT_MME0_WBC_RSP:
7256 case GAUDI_EVENT_MME0_SBAB0_RSP:
7257 case GAUDI_EVENT_MME1_WBC_RSP:
7258 case GAUDI_EVENT_MME1_SBAB0_RSP:
7259 case GAUDI_EVENT_MME2_WBC_RSP:
7260 case GAUDI_EVENT_MME2_SBAB0_RSP:
7261 case GAUDI_EVENT_MME3_WBC_RSP:
7262 case GAUDI_EVENT_MME3_SBAB0_RSP:
7263 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7264 case GAUDI_EVENT_PSOC_AXI_DEC:
7265 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7266 case GAUDI_EVENT_MMU_PAGE_FAULT:
7267 case GAUDI_EVENT_MMU_WR_PERM:
7268 case GAUDI_EVENT_RAZWI_OR_ADC:
7269 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7270 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7271 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7273 case GAUDI_EVENT_NIC0_QM0:
7274 case GAUDI_EVENT_NIC0_QM1:
7275 case GAUDI_EVENT_NIC1_QM0:
7276 case GAUDI_EVENT_NIC1_QM1:
7277 case GAUDI_EVENT_NIC2_QM0:
7278 case GAUDI_EVENT_NIC2_QM1:
7279 case GAUDI_EVENT_NIC3_QM0:
7280 case GAUDI_EVENT_NIC3_QM1:
7281 case GAUDI_EVENT_NIC4_QM0:
7282 case GAUDI_EVENT_NIC4_QM1:
7283 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7284 gaudi_print_irq_info(hdev, event_type, true);
7285 gaudi_handle_qman_err(hdev, event_type);
7286 hl_fw_unmask_irq(hdev, event_type);
7289 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7290 gaudi_print_irq_info(hdev, event_type, true);
7291 if (hdev->hard_reset_on_fw_events)
7292 hl_device_reset(hdev, true, false);
7295 case GAUDI_EVENT_TPC0_BMON_SPMU:
7296 case GAUDI_EVENT_TPC1_BMON_SPMU:
7297 case GAUDI_EVENT_TPC2_BMON_SPMU:
7298 case GAUDI_EVENT_TPC3_BMON_SPMU:
7299 case GAUDI_EVENT_TPC4_BMON_SPMU:
7300 case GAUDI_EVENT_TPC5_BMON_SPMU:
7301 case GAUDI_EVENT_TPC6_BMON_SPMU:
7302 case GAUDI_EVENT_TPC7_BMON_SPMU:
7303 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7304 gaudi_print_irq_info(hdev, event_type, false);
7305 hl_fw_unmask_irq(hdev, event_type);
7308 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7309 gaudi_print_clk_change_info(hdev, event_type);
7310 hl_fw_unmask_irq(hdev, event_type);
7313 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7314 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7316 "Received high temp H/W interrupt %d (cause %d)\n",
7321 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7327 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7330 struct gaudi_device *gaudi = hdev->asic_specific;
7333 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7334 return gaudi->events_stat_aggregate;
7337 *size = (u32) sizeof(gaudi->events_stat);
7338 return gaudi->events_stat;
7341 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
7344 struct gaudi_device *gaudi = hdev->asic_specific;
7345 u32 status, timeout_usec;
7348 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7349 hdev->hard_reset_pending)
7353 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7355 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7357 mutex_lock(&hdev->mmu_cache_lock);
7359 /* L0 & L1 invalidation */
7360 WREG32(mmSTLB_INV_PS, 3);
7361 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7362 WREG32(mmSTLB_INV_PS, 2);
7364 rc = hl_poll_timeout(
7372 WREG32(mmSTLB_INV_SET, 0);
7374 mutex_unlock(&hdev->mmu_cache_lock);
7377 dev_err_ratelimited(hdev->dev,
7378 "MMU cache invalidation timeout\n");
7379 hl_device_reset(hdev, true, false);
7385 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7386 bool is_hard, u32 asid, u64 va, u64 size)
7388 struct gaudi_device *gaudi = hdev->asic_specific;
7389 u32 status, timeout_usec;
7394 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7395 hdev->hard_reset_pending)
7398 mutex_lock(&hdev->mmu_cache_lock);
7401 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7403 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7406 * TODO: currently invalidate entire L0 & L1 as in regular hard
7407 * invalidation. Need to apply invalidation of specific cache
7408 * lines with mask of ASID & VA & size.
7409 * Note that L1 with be flushed entirely in any case.
7412 /* L0 & L1 invalidation */
7413 inv_data = RREG32(mmSTLB_CACHE_INV);
7415 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7416 WREG32(mmSTLB_CACHE_INV,
7417 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7419 rc = hl_poll_timeout(
7421 mmSTLB_INV_CONSUMER_INDEX,
7427 mutex_unlock(&hdev->mmu_cache_lock);
7430 dev_err_ratelimited(hdev->dev,
7431 "MMU cache invalidation timeout\n");
7432 hl_device_reset(hdev, true, false);
7438 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7439 u32 asid, u64 phys_addr)
7441 u32 status, timeout_usec;
7445 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7447 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7449 WREG32(MMU_ASID, asid);
7450 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7451 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7452 WREG32(MMU_BUSY, 0x80000000);
7454 rc = hl_poll_timeout(
7458 !(status & 0x80000000),
7464 "Timeout during MMU hop0 config of asid %d\n", asid);
7471 static int gaudi_send_heartbeat(struct hl_device *hdev)
7473 struct gaudi_device *gaudi = hdev->asic_specific;
7475 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7478 return hl_fw_send_heartbeat(hdev);
7481 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7483 struct gaudi_device *gaudi = hdev->asic_specific;
7484 struct asic_fixed_properties *prop = &hdev->asic_prop;
7487 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7490 rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
7494 if (!strlen(prop->cpucp_info.card_name))
7495 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7498 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
7500 if (hdev->card_type == cpucp_card_type_pci)
7501 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
7502 else if (hdev->card_type == cpucp_card_type_pmc)
7503 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
7505 hdev->max_power = prop->max_power_default;
7510 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
7513 struct gaudi_device *gaudi = hdev->asic_specific;
7514 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
7515 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
7516 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
7517 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
7518 bool is_idle = true, is_eng_idle, is_slave;
7520 int i, dma_id, port;
7522 mutex_lock(&gaudi->clk_gate_mutex);
7524 hdev->asic_funcs->disable_clock_gating(hdev);
7528 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
7529 "--- ------- ------------ ---------- -------------\n");
7531 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
7532 dma_id = gaudi_dma_assignment[i];
7533 offset = dma_id * DMA_QMAN_OFFSET;
7535 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
7536 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
7537 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
7538 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7539 IS_DMA_IDLE(dma_core_sts0);
7540 is_idle &= is_eng_idle;
7543 *mask |= ((u64) !is_eng_idle) <<
7544 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
7546 seq_printf(s, fmt, dma_id,
7547 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
7548 qm_cgm_sts, dma_core_sts0);
7553 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
7554 "--- ------- ------------ ---------- ----------\n");
7556 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
7557 offset = i * TPC_QMAN_OFFSET;
7558 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
7559 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
7560 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
7561 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7562 IS_TPC_IDLE(tpc_cfg_sts);
7563 is_idle &= is_eng_idle;
7566 *mask |= ((u64) !is_eng_idle) <<
7567 (GAUDI_ENGINE_ID_TPC_0 + i);
7569 seq_printf(s, fmt, i,
7570 is_eng_idle ? "Y" : "N",
7571 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7576 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
7577 "--- ------- ------------ ---------- -----------\n");
7579 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
7580 offset = i * MME_QMAN_OFFSET;
7581 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
7582 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
7584 /* MME 1 & 3 are slaves, no need to check their QMANs */
7587 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
7588 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
7589 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7592 is_idle &= is_eng_idle;
7595 *mask |= ((u64) !is_eng_idle) <<
7596 (GAUDI_ENGINE_ID_MME_0 + i);
7599 seq_printf(s, fmt, i,
7600 is_eng_idle ? "Y" : "N",
7601 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
7603 seq_printf(s, mme_slave_fmt, i,
7604 is_eng_idle ? "Y" : "N", "-",
7610 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7611 "--- ------- ------------ ----------\n");
7613 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
7614 offset = i * NIC_MACRO_QMAN_OFFSET;
7616 if (hdev->nic_ports_mask & BIT(port)) {
7617 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7618 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7619 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7620 is_idle &= is_eng_idle;
7623 *mask |= ((u64) !is_eng_idle) <<
7624 (GAUDI_ENGINE_ID_NIC_0 + port);
7626 seq_printf(s, nic_fmt, port,
7627 is_eng_idle ? "Y" : "N",
7628 qm_glbl_sts0, qm_cgm_sts);
7632 if (hdev->nic_ports_mask & BIT(port)) {
7633 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
7634 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
7635 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7636 is_idle &= is_eng_idle;
7639 *mask |= ((u64) !is_eng_idle) <<
7640 (GAUDI_ENGINE_ID_NIC_0 + port);
7642 seq_printf(s, nic_fmt, port,
7643 is_eng_idle ? "Y" : "N",
7644 qm_glbl_sts0, qm_cgm_sts);
7651 hdev->asic_funcs->set_clock_gating(hdev);
7653 mutex_unlock(&gaudi->clk_gate_mutex);
7658 static void gaudi_hw_queues_lock(struct hl_device *hdev)
7659 __acquires(&gaudi->hw_queues_lock)
7661 struct gaudi_device *gaudi = hdev->asic_specific;
7663 spin_lock(&gaudi->hw_queues_lock);
7666 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
7667 __releases(&gaudi->hw_queues_lock)
7669 struct gaudi_device *gaudi = hdev->asic_specific;
7671 spin_unlock(&gaudi->hw_queues_lock);
7674 static u32 gaudi_get_pci_id(struct hl_device *hdev)
7676 return hdev->pdev->device;
7679 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
7682 struct gaudi_device *gaudi = hdev->asic_specific;
7684 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7687 return hl_fw_get_eeprom_data(hdev, data, max_size);
7691 * this function should be used only during initialization and/or after reset,
7692 * when there are no active users.
7694 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
7697 struct gaudi_device *gaudi = hdev->asic_specific;
7702 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
7705 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
7707 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
7709 mutex_lock(&gaudi->clk_gate_mutex);
7711 hdev->asic_funcs->disable_clock_gating(hdev);
7713 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
7714 lower_32_bits(tpc_kernel));
7715 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
7716 upper_32_bits(tpc_kernel));
7718 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
7719 lower_32_bits(tpc_kernel));
7720 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
7721 upper_32_bits(tpc_kernel));
7722 /* set a valid LUT pointer, content is of no significance */
7723 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
7724 lower_32_bits(tpc_kernel));
7725 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
7726 upper_32_bits(tpc_kernel));
7728 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
7729 lower_32_bits(CFG_BASE +
7730 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
7732 WREG32(mmTPC0_CFG_TPC_CMD + offset,
7733 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
7734 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
7735 /* wait a bit for the engine to start executing */
7736 usleep_range(1000, 1500);
7738 /* wait until engine has finished executing */
7739 rc = hl_poll_timeout(
7741 mmTPC0_CFG_STATUS + offset,
7743 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7744 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7750 "Timeout while waiting for TPC%d icache prefetch\n",
7752 hdev->asic_funcs->set_clock_gating(hdev);
7753 mutex_unlock(&gaudi->clk_gate_mutex);
7757 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
7758 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
7760 /* wait a bit for the engine to start executing */
7761 usleep_range(1000, 1500);
7763 /* wait until engine has finished executing */
7764 rc = hl_poll_timeout(
7766 mmTPC0_CFG_STATUS + offset,
7768 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7769 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7775 "Timeout while waiting for TPC%d vector pipe\n",
7777 hdev->asic_funcs->set_clock_gating(hdev);
7778 mutex_unlock(&gaudi->clk_gate_mutex);
7782 rc = hl_poll_timeout(
7784 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
7790 hdev->asic_funcs->set_clock_gating(hdev);
7791 mutex_unlock(&gaudi->clk_gate_mutex);
7795 "Timeout while waiting for TPC%d kernel to execute\n",
7803 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
7806 struct gaudi_device *gaudi = hdev->asic_specific;
7807 int min_alloc_order, rc, collective_cb_size;
7809 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7812 hdev->internal_cb_pool_virt_addr =
7813 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
7814 HOST_SPACE_INTERNAL_CB_SZ,
7815 &hdev->internal_cb_pool_dma_addr,
7816 GFP_KERNEL | __GFP_ZERO);
7818 if (!hdev->internal_cb_pool_virt_addr)
7821 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
7822 sizeof(struct packet_fence);
7823 min_alloc_order = ilog2(collective_cb_size);
7825 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
7826 if (!hdev->internal_cb_pool) {
7828 "Failed to create internal CB pool\n");
7830 goto free_internal_cb_pool;
7833 rc = gen_pool_add(hdev->internal_cb_pool,
7834 (uintptr_t) hdev->internal_cb_pool_virt_addr,
7835 HOST_SPACE_INTERNAL_CB_SZ, -1);
7838 "Failed to add memory to internal CB pool\n");
7840 goto destroy_internal_cb_pool;
7843 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
7844 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
7845 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
7847 if (!hdev->internal_cb_va_base)
7848 goto destroy_internal_cb_pool;
7850 mutex_lock(&ctx->mmu_lock);
7851 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
7852 hdev->internal_cb_pool_dma_addr,
7853 HOST_SPACE_INTERNAL_CB_SZ);
7855 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
7856 mutex_unlock(&ctx->mmu_lock);
7859 goto unreserve_internal_cb_pool;
7863 unreserve_internal_cb_pool:
7864 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7865 HOST_SPACE_INTERNAL_CB_SZ);
7866 destroy_internal_cb_pool:
7867 gen_pool_destroy(hdev->internal_cb_pool);
7868 free_internal_cb_pool:
7869 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7870 HOST_SPACE_INTERNAL_CB_SZ,
7871 hdev->internal_cb_pool_virt_addr,
7872 hdev->internal_cb_pool_dma_addr);
7877 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
7880 struct gaudi_device *gaudi = hdev->asic_specific;
7882 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7885 mutex_lock(&ctx->mmu_lock);
7886 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
7887 HOST_SPACE_INTERNAL_CB_SZ);
7888 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7889 HOST_SPACE_INTERNAL_CB_SZ);
7890 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
7891 mutex_unlock(&ctx->mmu_lock);
7893 gen_pool_destroy(hdev->internal_cb_pool);
7895 hdev->asic_funcs->asic_dma_free_coherent(hdev,
7896 HOST_SPACE_INTERNAL_CB_SZ,
7897 hdev->internal_cb_pool_virt_addr,
7898 hdev->internal_cb_pool_dma_addr);
7901 static int gaudi_ctx_init(struct hl_ctx *ctx)
7903 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
7904 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
7907 static void gaudi_ctx_fini(struct hl_ctx *ctx)
7909 struct hl_device *hdev = ctx->hdev;
7911 /* Gaudi will NEVER support more then a single compute context.
7912 * Therefore, don't clear anything unless it is the compute context
7914 if (hdev->compute_ctx != ctx)
7917 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
7920 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
7922 return gaudi_cq_assignment[cq_idx];
7925 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
7927 return sizeof(struct packet_msg_short) +
7928 sizeof(struct packet_msg_prot) * 2;
7931 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
7933 return sizeof(struct packet_msg_short) * 4 +
7934 sizeof(struct packet_fence) +
7935 sizeof(struct packet_msg_prot) * 2;
7938 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
7941 struct hl_cb *cb = (struct hl_cb *) data;
7942 struct packet_msg_short *pkt;
7943 u32 value, ctl, pkt_size = sizeof(*pkt);
7945 pkt = cb->kernel_address + size;
7946 memset(pkt, 0, pkt_size);
7948 /* Inc by 1, Mode ADD */
7949 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
7950 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
7952 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
7953 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7954 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
7955 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7956 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
7957 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7958 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
7960 pkt->value = cpu_to_le32(value);
7961 pkt->ctl = cpu_to_le32(ctl);
7963 return size + pkt_size;
7966 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
7969 u32 ctl, pkt_size = sizeof(*pkt);
7971 memset(pkt, 0, pkt_size);
7973 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
7974 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
7975 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7976 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7977 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7978 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
7980 pkt->value = cpu_to_le32(value);
7981 pkt->ctl = cpu_to_le32(ctl);
7986 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
7987 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
7988 u16 sob_val, u16 mon_id)
7991 u32 ctl, value, pkt_size = sizeof(*pkt);
7992 u16 msg_addr_offset;
7995 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
7997 "sob_base %u (mask %#x) is not valid\n",
7998 sob_base, sob_mask);
8003 * monitor_base should be the content of the base0 address registers,
8004 * so it will be added to the msg short offsets
8006 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8009 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8012 memset(pkt, 0, pkt_size);
8014 /* Monitor config packet: bind the monitor to a sync object */
8015 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8016 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8017 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8018 0); /* GREATER OR EQUAL*/
8019 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8021 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8022 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8023 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8024 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8025 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
8026 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
8027 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
8029 pkt->value = cpu_to_le32(value);
8030 pkt->ctl = cpu_to_le32(ctl);
8035 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8037 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8039 memset(pkt, 0, pkt_size);
8041 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8042 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8043 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8045 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
8046 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
8047 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
8048 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
8050 pkt->cfg = cpu_to_le32(cfg);
8051 pkt->ctl = cpu_to_le32(ctl);
8056 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8058 u32 offset, nic_index;
8061 case GAUDI_QUEUE_ID_DMA_0_0:
8062 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8064 case GAUDI_QUEUE_ID_DMA_0_1:
8065 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8067 case GAUDI_QUEUE_ID_DMA_0_2:
8068 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8070 case GAUDI_QUEUE_ID_DMA_0_3:
8071 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8073 case GAUDI_QUEUE_ID_DMA_1_0:
8074 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8076 case GAUDI_QUEUE_ID_DMA_1_1:
8077 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8079 case GAUDI_QUEUE_ID_DMA_1_2:
8080 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8082 case GAUDI_QUEUE_ID_DMA_1_3:
8083 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8085 case GAUDI_QUEUE_ID_DMA_5_0:
8086 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8088 case GAUDI_QUEUE_ID_DMA_5_1:
8089 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8091 case GAUDI_QUEUE_ID_DMA_5_2:
8092 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8094 case GAUDI_QUEUE_ID_DMA_5_3:
8095 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8097 case GAUDI_QUEUE_ID_TPC_7_0:
8098 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8100 case GAUDI_QUEUE_ID_TPC_7_1:
8101 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8103 case GAUDI_QUEUE_ID_TPC_7_2:
8104 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8106 case GAUDI_QUEUE_ID_TPC_7_3:
8107 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8109 case GAUDI_QUEUE_ID_NIC_0_0:
8110 case GAUDI_QUEUE_ID_NIC_1_0:
8111 case GAUDI_QUEUE_ID_NIC_2_0:
8112 case GAUDI_QUEUE_ID_NIC_3_0:
8113 case GAUDI_QUEUE_ID_NIC_4_0:
8114 case GAUDI_QUEUE_ID_NIC_5_0:
8115 case GAUDI_QUEUE_ID_NIC_6_0:
8116 case GAUDI_QUEUE_ID_NIC_7_0:
8117 case GAUDI_QUEUE_ID_NIC_8_0:
8118 case GAUDI_QUEUE_ID_NIC_9_0:
8119 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8120 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8121 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8122 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8124 case GAUDI_QUEUE_ID_NIC_0_1:
8125 case GAUDI_QUEUE_ID_NIC_1_1:
8126 case GAUDI_QUEUE_ID_NIC_2_1:
8127 case GAUDI_QUEUE_ID_NIC_3_1:
8128 case GAUDI_QUEUE_ID_NIC_4_1:
8129 case GAUDI_QUEUE_ID_NIC_5_1:
8130 case GAUDI_QUEUE_ID_NIC_6_1:
8131 case GAUDI_QUEUE_ID_NIC_7_1:
8132 case GAUDI_QUEUE_ID_NIC_8_1:
8133 case GAUDI_QUEUE_ID_NIC_9_1:
8134 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8135 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8136 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8137 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8139 case GAUDI_QUEUE_ID_NIC_0_2:
8140 case GAUDI_QUEUE_ID_NIC_1_2:
8141 case GAUDI_QUEUE_ID_NIC_2_2:
8142 case GAUDI_QUEUE_ID_NIC_3_2:
8143 case GAUDI_QUEUE_ID_NIC_4_2:
8144 case GAUDI_QUEUE_ID_NIC_5_2:
8145 case GAUDI_QUEUE_ID_NIC_6_2:
8146 case GAUDI_QUEUE_ID_NIC_7_2:
8147 case GAUDI_QUEUE_ID_NIC_8_2:
8148 case GAUDI_QUEUE_ID_NIC_9_2:
8149 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8150 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8151 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8152 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8154 case GAUDI_QUEUE_ID_NIC_0_3:
8155 case GAUDI_QUEUE_ID_NIC_1_3:
8156 case GAUDI_QUEUE_ID_NIC_2_3:
8157 case GAUDI_QUEUE_ID_NIC_3_3:
8158 case GAUDI_QUEUE_ID_NIC_4_3:
8159 case GAUDI_QUEUE_ID_NIC_5_3:
8160 case GAUDI_QUEUE_ID_NIC_6_3:
8161 case GAUDI_QUEUE_ID_NIC_7_3:
8162 case GAUDI_QUEUE_ID_NIC_8_3:
8163 case GAUDI_QUEUE_ID_NIC_9_3:
8164 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8165 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8166 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8167 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8173 *addr = CFG_BASE + offset;
8178 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8182 u16 msg_addr_offset;
8185 * monitor_base should be the content of the base0 address registers,
8186 * so it will be added to the msg short offsets
8188 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8190 /* First monitor config packet: low address of the sync */
8192 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8195 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8198 /* Second monitor config packet: high address of the sync */
8200 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8203 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8207 * Third monitor config packet: the payload, i.e. what to write when the
8211 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8214 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8219 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8220 struct hl_gen_wait_properties *prop)
8222 struct hl_cb *cb = (struct hl_cb *) prop->data;
8223 void *buf = cb->kernel_address;
8225 u32 size = prop->size;
8227 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8228 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8233 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8234 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8235 prop->sob_mask, prop->sob_val, prop->mon_id);
8236 size += gaudi_add_fence_pkt(buf + size);
8241 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8243 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8245 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8248 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
8251 kref_init(&hw_sob->kref);
8254 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8256 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8257 HL_POWER9_HOST_MAGIC) {
8258 hdev->power9_64bit_dma_enable = 1;
8259 hdev->dma_mask = 64;
8261 hdev->power9_64bit_dma_enable = 0;
8262 hdev->dma_mask = 48;
8266 static u64 gaudi_get_device_time(struct hl_device *hdev)
8268 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8270 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8273 static const struct hl_asic_funcs gaudi_funcs = {
8274 .early_init = gaudi_early_init,
8275 .early_fini = gaudi_early_fini,
8276 .late_init = gaudi_late_init,
8277 .late_fini = gaudi_late_fini,
8278 .sw_init = gaudi_sw_init,
8279 .sw_fini = gaudi_sw_fini,
8280 .hw_init = gaudi_hw_init,
8281 .hw_fini = gaudi_hw_fini,
8282 .halt_engines = gaudi_halt_engines,
8283 .suspend = gaudi_suspend,
8284 .resume = gaudi_resume,
8285 .cb_mmap = gaudi_cb_mmap,
8286 .ring_doorbell = gaudi_ring_doorbell,
8287 .pqe_write = gaudi_pqe_write,
8288 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8289 .asic_dma_free_coherent = gaudi_dma_free_coherent,
8290 .scrub_device_mem = gaudi_scrub_device_mem,
8291 .get_int_queue_base = gaudi_get_int_queue_base,
8292 .test_queues = gaudi_test_queues,
8293 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8294 .asic_dma_pool_free = gaudi_dma_pool_free,
8295 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8296 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8297 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8298 .cs_parser = gaudi_cs_parser,
8299 .asic_dma_map_sg = gaudi_dma_map_sg,
8300 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8301 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8302 .update_eq_ci = gaudi_update_eq_ci,
8303 .context_switch = gaudi_context_switch,
8304 .restore_phase_topology = gaudi_restore_phase_topology,
8305 .debugfs_read32 = gaudi_debugfs_read32,
8306 .debugfs_write32 = gaudi_debugfs_write32,
8307 .debugfs_read64 = gaudi_debugfs_read64,
8308 .debugfs_write64 = gaudi_debugfs_write64,
8309 .add_device_attr = gaudi_add_device_attr,
8310 .handle_eqe = gaudi_handle_eqe,
8311 .set_pll_profile = gaudi_set_pll_profile,
8312 .get_events_stat = gaudi_get_events_stat,
8313 .read_pte = gaudi_read_pte,
8314 .write_pte = gaudi_write_pte,
8315 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8316 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8317 .send_heartbeat = gaudi_send_heartbeat,
8318 .set_clock_gating = gaudi_set_clock_gating,
8319 .disable_clock_gating = gaudi_disable_clock_gating,
8320 .debug_coresight = gaudi_debug_coresight,
8321 .is_device_idle = gaudi_is_device_idle,
8322 .soft_reset_late_init = gaudi_soft_reset_late_init,
8323 .hw_queues_lock = gaudi_hw_queues_lock,
8324 .hw_queues_unlock = gaudi_hw_queues_unlock,
8325 .get_pci_id = gaudi_get_pci_id,
8326 .get_eeprom_data = gaudi_get_eeprom_data,
8327 .send_cpu_message = gaudi_send_cpu_message,
8328 .pci_bars_map = gaudi_pci_bars_map,
8329 .init_iatu = gaudi_init_iatu,
8332 .halt_coresight = gaudi_halt_coresight,
8333 .ctx_init = gaudi_ctx_init,
8334 .ctx_fini = gaudi_ctx_fini,
8335 .get_clk_rate = gaudi_get_clk_rate,
8336 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8337 .read_device_fw_version = gaudi_read_device_fw_version,
8338 .load_firmware_to_device = gaudi_load_firmware_to_device,
8339 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
8340 .get_signal_cb_size = gaudi_get_signal_cb_size,
8341 .get_wait_cb_size = gaudi_get_wait_cb_size,
8342 .gen_signal_cb = gaudi_gen_signal_cb,
8343 .gen_wait_cb = gaudi_gen_wait_cb,
8344 .reset_sob = gaudi_reset_sob,
8345 .reset_sob_group = gaudi_reset_sob_group,
8346 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
8347 .get_device_time = gaudi_get_device_time,
8348 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
8349 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs
8353 * gaudi_set_asic_funcs - set GAUDI function pointers
8355 * @hdev: pointer to hl_device structure
8358 void gaudi_set_asic_funcs(struct hl_device *hdev)
8360 hdev->asic_funcs = &gaudi_funcs;