1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/genalloc.h>
21 #include <linux/io-64-nonatomic-lo-hi.h>
22 #include <linux/iommu.h>
23 #include <linux/seq_file.h>
26 * Gaudi security scheme:
28 * 1. Host is protected by:
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
35 * 3. Configuration is protected by:
39 * MMU is always enabled.
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
56 * - MMU page tables area clear (happens on init)
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
64 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
68 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
70 #define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
75 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
84 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
86 #define GAUDI_MAX_STRING_LEN 20
88 #define GAUDI_CB_POOL_CB_CNT 512
89 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
91 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
93 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
95 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
97 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
99 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
101 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
102 BIT(GAUDI_ENGINE_ID_MME_0) |\
103 BIT(GAUDI_ENGINE_ID_MME_2) |\
104 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
113 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116 [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
124 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125 [0] = GAUDI_QUEUE_ID_DMA_0_0,
126 [1] = GAUDI_QUEUE_ID_DMA_0_1,
127 [2] = GAUDI_QUEUE_ID_DMA_0_2,
128 [3] = GAUDI_QUEUE_ID_DMA_0_3,
129 [4] = GAUDI_QUEUE_ID_DMA_1_0,
130 [5] = GAUDI_QUEUE_ID_DMA_1_1,
131 [6] = GAUDI_QUEUE_ID_DMA_1_2,
132 [7] = GAUDI_QUEUE_ID_DMA_1_3,
133 [8] = GAUDI_QUEUE_ID_DMA_5_0,
134 [9] = GAUDI_QUEUE_ID_DMA_5_1,
135 [10] = GAUDI_QUEUE_ID_DMA_5_2,
136 [11] = GAUDI_QUEUE_ID_DMA_5_3
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
141 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
142 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
143 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
144 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
145 [PACKET_REPEAT] = sizeof(struct packet_repeat),
146 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
147 [PACKET_FENCE] = sizeof(struct packet_fence),
148 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
149 [PACKET_NOP] = sizeof(struct packet_nop),
150 [PACKET_STOP] = sizeof(struct packet_stop),
151 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
152 [PACKET_WAIT] = sizeof(struct packet_wait),
153 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
156 static inline bool validate_packet_id(enum packet_id id)
160 case PACKET_WREG_BULK:
161 case PACKET_MSG_LONG:
162 case PACKET_MSG_SHORT:
165 case PACKET_MSG_PROT:
170 case PACKET_ARB_POINT:
172 case PACKET_LOAD_AND_EXE:
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181 "tpc_address_exceed_slm",
183 "tpc_spu_mac_overflow",
184 "tpc_spu_addsub_overflow",
185 "tpc_spu_abs_overflow",
186 "tpc_spu_fp_dst_nan_inf",
187 "tpc_spu_fp_dst_denorm",
188 "tpc_vpu_mac_overflow",
189 "tpc_vpu_addsub_overflow",
190 "tpc_vpu_abs_overflow",
191 "tpc_vpu_fp_dst_nan_inf",
192 "tpc_vpu_fp_dst_denorm",
194 "tpc_illegal_instruction",
195 "tpc_pc_wrap_around",
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
208 "CP error due to undefined OPCODE",
209 "CP encountered STOP OPCODE",
211 "CP WRREG32 or WRBULK returned error",
213 "FENCE 0 inc over max value and clipped",
214 "FENCE 1 inc over max value and clipped",
215 "FENCE 2 inc over max value and clipped",
216 "FENCE 3 inc over max value and clipped",
217 "FENCE 0 dec under min value and clipped",
218 "FENCE 1 dec under min value and clipped",
219 "FENCE 2 dec under min value and clipped",
220 "FENCE 3 dec under min value and clipped"
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225 "Choice push while full error",
226 "Choice Q watchdog error",
227 "MSG AXI LBW returned with error"
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
316 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
317 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
318 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
319 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
320 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
321 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
322 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
323 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
324 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
325 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
326 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
327 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
328 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
329 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
330 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
331 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
332 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
333 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
334 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
335 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
336 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
337 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
338 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
339 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
340 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
341 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
342 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
343 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
346 struct ecc_info_extract_params {
350 bool disable_clock_gating;
353 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
355 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356 struct hl_cs_job *job);
357 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
359 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
361 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362 static int gaudi_cpucp_info_get(struct hl_device *hdev);
363 static void gaudi_disable_clock_gating(struct hl_device *hdev);
364 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
366 static int gaudi_get_fixed_properties(struct hl_device *hdev)
368 struct asic_fixed_properties *prop = &hdev->asic_prop;
369 u32 num_sync_stream_queues = 0;
372 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373 prop->hw_queues_props = kcalloc(prop->max_queues,
374 sizeof(struct hw_queue_properties),
377 if (!prop->hw_queues_props)
380 for (i = 0 ; i < prop->max_queues ; i++) {
381 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383 prop->hw_queues_props[i].driver_only = 0;
384 prop->hw_queues_props[i].requires_kernel_cb = 1;
385 prop->hw_queues_props[i].supports_sync_stream = 1;
386 num_sync_stream_queues++;
387 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389 prop->hw_queues_props[i].driver_only = 1;
390 prop->hw_queues_props[i].requires_kernel_cb = 0;
391 prop->hw_queues_props[i].supports_sync_stream = 0;
392 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394 prop->hw_queues_props[i].driver_only = 0;
395 prop->hw_queues_props[i].requires_kernel_cb = 0;
396 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398 prop->hw_queues_props[i].driver_only = 0;
399 prop->hw_queues_props[i].requires_kernel_cb = 0;
400 prop->hw_queues_props[i].supports_sync_stream = 0;
404 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405 prop->sync_stream_first_sob = 0;
406 prop->sync_stream_first_mon = 0;
407 prop->dram_base_address = DRAM_PHYS_BASE;
408 prop->dram_size = GAUDI_HBM_SIZE_32GB;
409 prop->dram_end_address = prop->dram_base_address +
411 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
413 prop->sram_base_address = SRAM_BASE_ADDR;
414 prop->sram_size = SRAM_SIZE;
415 prop->sram_end_address = prop->sram_base_address +
417 prop->sram_user_base_address = prop->sram_base_address +
418 SRAM_USER_BASE_OFFSET;
420 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
422 prop->mmu_pgt_size = 0x800000; /* 8MB */
424 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425 prop->mmu_pte_size = HL_PTE_SIZE;
426 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428 prop->dram_page_size = PAGE_SIZE_2MB;
430 prop->pmmu.hop0_shift = HOP0_SHIFT;
431 prop->pmmu.hop1_shift = HOP1_SHIFT;
432 prop->pmmu.hop2_shift = HOP2_SHIFT;
433 prop->pmmu.hop3_shift = HOP3_SHIFT;
434 prop->pmmu.hop4_shift = HOP4_SHIFT;
435 prop->pmmu.hop0_mask = HOP0_MASK;
436 prop->pmmu.hop1_mask = HOP1_MASK;
437 prop->pmmu.hop2_mask = HOP2_MASK;
438 prop->pmmu.hop3_mask = HOP3_MASK;
439 prop->pmmu.hop4_mask = HOP4_MASK;
440 prop->pmmu.start_addr = VA_HOST_SPACE_START;
441 prop->pmmu.end_addr =
442 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443 prop->pmmu.page_size = PAGE_SIZE_4KB;
444 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
446 /* PMMU and HPMMU are the same except of page size */
447 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
450 /* shifts and masks are the same in PMMU and DMMU */
451 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453 prop->dmmu.end_addr = VA_HOST_SPACE_END;
454 prop->dmmu.page_size = PAGE_SIZE_2MB;
456 prop->cfg_size = CFG_SIZE;
457 prop->max_asid = MAX_ASID;
458 prop->num_of_events = GAUDI_EVENT_SIZE;
459 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
461 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
463 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
466 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
469 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
472 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
474 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475 num_sync_stream_queues * HL_RSVD_SOBS;
476 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477 num_sync_stream_queues * HL_RSVD_MONS;
482 static int gaudi_pci_bars_map(struct hl_device *hdev)
484 static const char * const name[] = {"SRAM", "CFG", "HBM"};
485 bool is_wc[3] = {false, false, true};
488 rc = hl_pci_bars_map(hdev, name, is_wc);
492 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493 (CFG_BASE - SPI_FLASH_BASE_ADDR);
498 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
500 struct gaudi_device *gaudi = hdev->asic_specific;
501 struct hl_inbound_pci_region pci_region;
505 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
508 /* Inbound Region 2 - Bar 4 - Point to HBM */
509 pci_region.mode = PCI_BAR_MATCH_MODE;
510 pci_region.bar = HBM_BAR_ID;
511 pci_region.addr = addr;
512 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
517 old_addr = gaudi->hbm_bar_cur_addr;
518 gaudi->hbm_bar_cur_addr = addr;
524 static int gaudi_init_iatu(struct hl_device *hdev)
526 struct hl_inbound_pci_region inbound_region;
527 struct hl_outbound_pci_region outbound_region;
530 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531 inbound_region.mode = PCI_BAR_MATCH_MODE;
532 inbound_region.bar = SRAM_BAR_ID;
533 inbound_region.addr = SRAM_BASE_ADDR;
534 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
538 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539 inbound_region.mode = PCI_BAR_MATCH_MODE;
540 inbound_region.bar = CFG_BAR_ID;
541 inbound_region.addr = SPI_FLASH_BASE_ADDR;
542 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
546 /* Inbound Region 2 - Bar 4 - Point to HBM */
547 inbound_region.mode = PCI_BAR_MATCH_MODE;
548 inbound_region.bar = HBM_BAR_ID;
549 inbound_region.addr = DRAM_PHYS_BASE;
550 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
554 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
556 /* Outbound Region 0 - Point to Host */
557 outbound_region.addr = HOST_PHYS_BASE;
558 outbound_region.size = HOST_PHYS_SIZE;
559 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
565 static int gaudi_early_init(struct hl_device *hdev)
567 struct asic_fixed_properties *prop = &hdev->asic_prop;
568 struct pci_dev *pdev = hdev->pdev;
571 rc = gaudi_get_fixed_properties(hdev);
573 dev_err(hdev->dev, "Failed to get fixed properties\n");
577 /* Check BAR sizes */
578 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
580 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
582 (unsigned long long) pci_resource_len(pdev,
586 goto free_queue_props;
589 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
591 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
593 (unsigned long long) pci_resource_len(pdev,
597 goto free_queue_props;
600 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
602 rc = hl_pci_init(hdev);
604 goto free_queue_props;
609 kfree(hdev->asic_prop.hw_queues_props);
613 static int gaudi_early_fini(struct hl_device *hdev)
615 kfree(hdev->asic_prop.hw_queues_props);
622 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
624 * @hdev: pointer to hl_device structure
627 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
629 struct asic_fixed_properties *prop = &hdev->asic_prop;
632 u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
633 u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
634 u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
635 u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
636 u32 od = RREG32(mmPSOC_CPU_PLL_OD);
638 if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
639 if (div_sel == DIV_SEL_REF_CLK)
640 trace_freq = PLL_REF_CLK;
642 trace_freq = PLL_REF_CLK / (div_fctr + 1);
643 } else if (div_sel == DIV_SEL_PLL_CLK ||
644 div_sel == DIV_SEL_DIVIDED_PLL) {
645 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
646 if (div_sel == DIV_SEL_PLL_CLK)
647 trace_freq = pll_clk;
649 trace_freq = pll_clk / (div_fctr + 1);
652 "Received invalid div select value: %d", div_sel);
655 prop->psoc_timestamp_frequency = trace_freq;
656 prop->psoc_pci_pll_nr = nr;
657 prop->psoc_pci_pll_nf = nf;
658 prop->psoc_pci_pll_od = od;
659 prop->psoc_pci_pll_div_factor = div_fctr;
662 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
663 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
665 struct asic_fixed_properties *prop = &hdev->asic_prop;
666 struct packet_lin_dma *init_tpc_mem_pkt;
667 struct hl_cs_job *job;
674 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
678 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
680 cb_size = sizeof(*init_tpc_mem_pkt);
681 memset(init_tpc_mem_pkt, 0, cb_size);
683 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
685 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
686 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
687 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
688 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
690 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
692 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
693 dst_addr = (prop->sram_user_base_address &
694 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
695 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
696 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
698 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
700 dev_err(hdev->dev, "Failed to allocate a new job\n");
707 job->user_cb->cs_cnt++;
708 job->user_cb_size = cb_size;
709 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
710 job->patched_cb = job->user_cb;
711 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
713 hl_debugfs_add_job(hdev, job);
715 rc = gaudi_send_job_on_qman0(hdev, job);
720 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
721 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
727 hl_userptr_delete_list(hdev, &job->userptr_list);
728 hl_debugfs_remove_job(hdev, job);
734 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
740 * gaudi_init_tpc_mem() - Initialize TPC memories.
741 * @hdev: Pointer to hl_device structure.
743 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
745 * Return: 0 for success, negative value for error.
747 static int gaudi_init_tpc_mem(struct hl_device *hdev)
749 const struct firmware *fw;
752 dma_addr_t dma_handle;
755 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
757 dev_err(hdev->dev, "Firmware file %s is not found!\n",
763 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
764 &dma_handle, GFP_KERNEL | __GFP_ZERO);
767 "Failed to allocate %zu of dma memory for TPC kernel\n",
773 memcpy(cpu_addr, fw->data, fw_size);
775 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
777 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
781 release_firmware(fw);
785 static int gaudi_late_init(struct hl_device *hdev)
787 struct gaudi_device *gaudi = hdev->asic_specific;
790 rc = gaudi->cpucp_info_get(hdev);
792 dev_err(hdev->dev, "Failed to get cpucp info\n");
796 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
798 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
802 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
804 gaudi_fetch_psoc_frequency(hdev);
806 rc = gaudi_mmu_clear_pgt_range(hdev);
808 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
809 goto disable_pci_access;
812 rc = gaudi_init_tpc_mem(hdev);
814 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
815 goto disable_pci_access;
821 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
826 static void gaudi_late_fini(struct hl_device *hdev)
828 const struct hwmon_channel_info **channel_info_arr;
831 if (!hdev->hl_chip_info->info)
834 channel_info_arr = hdev->hl_chip_info->info;
836 while (channel_info_arr[i]) {
837 kfree(channel_info_arr[i]->config);
838 kfree(channel_info_arr[i]);
842 kfree(channel_info_arr);
844 hdev->hl_chip_info->info = NULL;
847 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
849 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
850 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
854 * The device CPU works with 40-bits addresses, while bit 39 must be set
855 * to '1' when accessing the host.
856 * Bits 49:39 of the full host address are saved for a later
857 * configuration of the HW to perform extension to 50 bits.
858 * Because there is a single HW register that holds the extension bits,
859 * these bits must be identical in all allocated range.
862 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
864 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
865 HL_CPU_ACCESSIBLE_MEM_SIZE,
867 GFP_KERNEL | __GFP_ZERO);
868 if (!virt_addr_arr[i]) {
870 goto free_dma_mem_arr;
873 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
874 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
875 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
879 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
881 "MSB of CPU accessible DMA memory are not identical in all range\n");
883 goto free_dma_mem_arr;
886 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
887 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
888 hdev->cpu_pci_msb_addr =
889 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
891 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
894 for (j = 0 ; j < i ; j++)
895 hdev->asic_funcs->asic_dma_free_coherent(hdev,
896 HL_CPU_ACCESSIBLE_MEM_SIZE,
903 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
905 struct gaudi_device *gaudi = hdev->asic_specific;
906 struct gaudi_internal_qman_info *q;
909 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
910 q = &gaudi->internal_qmans[i];
911 if (!q->pq_kernel_addr)
913 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
919 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
921 struct gaudi_device *gaudi = hdev->asic_specific;
922 struct gaudi_internal_qman_info *q;
925 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
926 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
929 q = &gaudi->internal_qmans[i];
932 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
933 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
934 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
936 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
937 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
939 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
940 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
943 dev_err(hdev->dev, "Bad internal queue index %d", i);
945 goto free_internal_qmans_pq_mem;
948 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
951 GFP_KERNEL | __GFP_ZERO);
952 if (!q->pq_kernel_addr) {
954 goto free_internal_qmans_pq_mem;
960 free_internal_qmans_pq_mem:
961 gaudi_free_internal_qmans_pq_mem(hdev);
965 static int gaudi_sw_init(struct hl_device *hdev)
967 struct gaudi_device *gaudi;
971 /* Allocate device structure */
972 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
976 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
977 if (gaudi_irq_map_table[i].valid) {
978 if (event_id == GAUDI_EVENT_SIZE) {
980 "Event array exceeds the limit of %u events\n",
983 goto free_gaudi_device;
986 gaudi->events[event_id++] =
987 gaudi_irq_map_table[i].fc_id;
991 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
993 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
995 hdev->asic_specific = gaudi;
997 /* Create DMA pool for small allocations */
998 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
999 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1000 if (!hdev->dma_pool) {
1001 dev_err(hdev->dev, "failed to create DMA pool\n");
1003 goto free_gaudi_device;
1006 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1010 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1011 if (!hdev->cpu_accessible_dma_pool) {
1013 "Failed to create CPU accessible DMA pool\n");
1015 goto free_cpu_dma_mem;
1018 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1019 (uintptr_t) hdev->cpu_accessible_dma_mem,
1020 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1023 "Failed to add memory to CPU accessible DMA pool\n");
1025 goto free_cpu_accessible_dma_pool;
1028 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1030 goto free_cpu_accessible_dma_pool;
1032 spin_lock_init(&gaudi->hw_queues_lock);
1033 mutex_init(&gaudi->clk_gate_mutex);
1035 hdev->supports_sync_stream = true;
1036 hdev->supports_coresight = true;
1040 free_cpu_accessible_dma_pool:
1041 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1043 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1044 hdev->cpu_pci_msb_addr);
1045 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1046 HL_CPU_ACCESSIBLE_MEM_SIZE,
1047 hdev->cpu_accessible_dma_mem,
1048 hdev->cpu_accessible_dma_address);
1050 dma_pool_destroy(hdev->dma_pool);
1056 static int gaudi_sw_fini(struct hl_device *hdev)
1058 struct gaudi_device *gaudi = hdev->asic_specific;
1060 gaudi_free_internal_qmans_pq_mem(hdev);
1062 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1064 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1065 hdev->cpu_pci_msb_addr);
1066 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1067 HL_CPU_ACCESSIBLE_MEM_SIZE,
1068 hdev->cpu_accessible_dma_mem,
1069 hdev->cpu_accessible_dma_address);
1071 dma_pool_destroy(hdev->dma_pool);
1073 mutex_destroy(&gaudi->clk_gate_mutex);
1080 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1082 struct hl_device *hdev = arg;
1088 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1089 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1091 hl_irq_handler_eq(irq, &hdev->event_queue);
1097 * For backward compatibility, new MSI interrupts should be set after the
1098 * existing CPU and NIC interrupts.
1100 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1105 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1106 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1107 GAUDI_EVENT_QUEUE_MSI_IDX);
1109 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1110 (nr + NIC_NUMBER_OF_ENGINES + 1);
1112 return pci_irq_vector(hdev->pdev, msi_vec);
1115 static int gaudi_enable_msi_single(struct hl_device *hdev)
1119 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1121 irq = gaudi_pci_irq_vector(hdev, 0, false);
1122 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1123 "gaudi single msi", hdev);
1126 "Failed to request single MSI IRQ\n");
1131 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1133 int cq_cnt = hdev->asic_prop.completion_queues_count;
1134 int rc, i, irq_cnt_init, irq;
1136 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1137 irq = gaudi_pci_irq_vector(hdev, i, false);
1138 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1139 &hdev->completion_queue[i]);
1141 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1146 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1147 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1148 &hdev->event_queue);
1150 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1157 for (i = 0 ; i < irq_cnt_init ; i++)
1158 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1159 &hdev->completion_queue[i]);
1163 static int gaudi_enable_msi(struct hl_device *hdev)
1165 struct gaudi_device *gaudi = hdev->asic_specific;
1168 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1171 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1174 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1178 if (rc < NUMBER_OF_INTERRUPTS) {
1179 gaudi->multi_msi_mode = false;
1180 rc = gaudi_enable_msi_single(hdev);
1182 gaudi->multi_msi_mode = true;
1183 rc = gaudi_enable_msi_multi(hdev);
1187 goto free_pci_irq_vectors;
1189 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1193 free_pci_irq_vectors:
1194 pci_free_irq_vectors(hdev->pdev);
1198 static void gaudi_sync_irqs(struct hl_device *hdev)
1200 struct gaudi_device *gaudi = hdev->asic_specific;
1201 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1203 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1206 /* Wait for all pending IRQs to be finished */
1207 if (gaudi->multi_msi_mode) {
1208 for (i = 0 ; i < cq_cnt ; i++)
1209 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1211 synchronize_irq(gaudi_pci_irq_vector(hdev,
1212 GAUDI_EVENT_QUEUE_MSI_IDX,
1215 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1219 static void gaudi_disable_msi(struct hl_device *hdev)
1221 struct gaudi_device *gaudi = hdev->asic_specific;
1222 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1224 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1227 gaudi_sync_irqs(hdev);
1229 if (gaudi->multi_msi_mode) {
1230 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1232 free_irq(irq, &hdev->event_queue);
1234 for (i = 0 ; i < cq_cnt ; i++) {
1235 irq = gaudi_pci_irq_vector(hdev, i, false);
1236 free_irq(irq, &hdev->completion_queue[i]);
1239 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1242 pci_free_irq_vectors(hdev->pdev);
1244 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1247 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1249 struct gaudi_device *gaudi = hdev->asic_specific;
1251 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1254 if (!hdev->sram_scrambler_enable)
1257 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1258 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1259 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1260 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1261 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1262 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1263 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1264 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1265 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1266 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1267 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1268 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1270 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1272 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1274 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1275 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1276 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1277 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1278 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1279 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1280 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1281 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1282 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1283 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1284 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1285 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1287 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1289 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1291 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1292 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1293 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1294 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1295 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1296 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1297 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1298 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1299 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1300 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1301 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1302 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1304 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1306 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1308 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1311 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1313 struct gaudi_device *gaudi = hdev->asic_specific;
1315 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1318 if (!hdev->dram_scrambler_enable)
1321 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1322 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1323 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1324 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1325 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1326 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1327 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1328 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1329 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1330 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1331 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1332 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1334 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1336 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1338 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1339 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1340 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1341 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1342 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1343 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1344 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1345 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1346 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1347 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1348 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1349 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1351 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1353 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1355 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1356 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1357 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1358 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1359 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1360 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1361 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1362 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1363 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1364 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1365 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1366 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1368 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1370 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1372 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1375 static void gaudi_init_e2e(struct hl_device *hdev)
1377 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1378 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1379 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1380 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1382 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1383 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1384 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1385 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1387 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1388 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1389 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1390 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1392 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1393 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1394 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1395 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1397 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1398 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1399 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1400 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1402 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1403 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1404 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1405 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1407 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1408 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1409 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1410 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1412 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1413 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1414 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1415 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1417 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1418 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1419 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1420 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1422 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1423 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1424 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1425 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1427 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1428 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1429 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1430 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1432 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1433 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1434 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1435 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1437 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1438 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1439 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1440 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1442 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1443 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1444 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1445 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1447 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1448 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1449 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1450 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1452 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1453 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1454 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1455 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1457 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1458 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1459 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1460 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1462 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1463 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1464 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1465 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1467 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1472 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1477 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1482 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1487 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1492 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1497 if (!hdev->dram_scrambler_enable) {
1498 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1499 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1500 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1501 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1503 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1504 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1505 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1506 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1508 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1509 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1510 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1511 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1513 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1514 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1515 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1516 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1518 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1519 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1520 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1521 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1523 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1524 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1525 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1526 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1528 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1529 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1530 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1531 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1533 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1534 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1535 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1536 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1538 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1539 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1540 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1541 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1543 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1544 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1545 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1546 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1548 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1549 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1550 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1551 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1553 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1554 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1555 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1556 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1558 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1559 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1560 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1561 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1563 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1564 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1565 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1566 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1568 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1569 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1570 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1571 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1573 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1574 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1575 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1576 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1578 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1579 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1580 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1581 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1583 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1584 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1585 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1586 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1588 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1593 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1598 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1603 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1608 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1613 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1619 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1620 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1621 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1622 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1624 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1625 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1626 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1627 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1629 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1630 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1632 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1634 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1635 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1637 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1639 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1640 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1642 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1644 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1645 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1647 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1649 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1650 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1652 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1654 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1655 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1657 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1659 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1660 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1662 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1664 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1665 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1667 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1669 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1670 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1672 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1674 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1675 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1677 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1679 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1680 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1682 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1684 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1685 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1687 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1689 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1690 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1692 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1694 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1695 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1697 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1699 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1700 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1701 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1702 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1704 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1705 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1706 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1707 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1709 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1710 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1712 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1714 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1715 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1717 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1719 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1720 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1722 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1724 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1725 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1727 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1729 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1730 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1732 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1734 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1735 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1737 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1740 static void gaudi_init_hbm_cred(struct hl_device *hdev)
1742 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1744 hbm0_wr = 0x33333333;
1745 hbm0_rd = 0x77777777;
1746 hbm1_wr = 0x55555555;
1747 hbm1_rd = 0xDDDDDDDD;
1749 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1750 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1751 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1752 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1754 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1755 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1756 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1757 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1759 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1764 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1769 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1770 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1771 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1772 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1773 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1774 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1775 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1776 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1777 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1778 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1779 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1780 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1782 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1783 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1786 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1789 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1792 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1793 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1796 static void gaudi_init_golden_registers(struct hl_device *hdev)
1801 gaudi_init_e2e(hdev);
1803 gaudi_init_hbm_cred(hdev);
1805 hdev->asic_funcs->disable_clock_gating(hdev);
1807 for (tpc_id = 0, tpc_offset = 0;
1808 tpc_id < TPC_NUMBER_OF_ENGINES;
1809 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1810 /* Mask all arithmetic interrupts from TPC */
1811 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1812 /* Set 16 cache lines */
1813 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1814 ICACHE_FETCH_LINE_NUM, 2);
1817 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1818 for (i = 0 ; i < 128 ; i += 8)
1819 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1821 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1822 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1823 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1824 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1827 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1828 int qman_id, dma_addr_t qman_pq_addr)
1830 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1831 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1832 u32 q_off, dma_qm_offset;
1835 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1837 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1838 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1839 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1840 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1841 so_base_en_lo = lower_32_bits(CFG_BASE +
1842 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1843 so_base_en_hi = upper_32_bits(CFG_BASE +
1844 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1845 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1846 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1847 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1848 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849 so_base_ws_lo = lower_32_bits(CFG_BASE +
1850 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1851 so_base_ws_hi = upper_32_bits(CFG_BASE +
1852 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1854 q_off = dma_qm_offset + qman_id * 4;
1856 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1857 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1859 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1860 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1861 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1863 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1864 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1865 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1867 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1868 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1869 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1870 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1871 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1872 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1873 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1874 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1876 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1878 /* The following configuration is needed only once per QMAN */
1880 /* Configure RAZWI IRQ */
1881 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1882 if (hdev->stop_on_err) {
1884 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1887 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1888 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1889 lower_32_bits(CFG_BASE +
1890 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1891 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1892 upper_32_bits(CFG_BASE +
1893 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1894 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1895 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1898 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1899 QM_ARB_ERR_MSG_EN_MASK);
1901 /* Increase ARB WDT to support streams architecture */
1902 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1903 GAUDI_ARB_WDT_TIMEOUT);
1905 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1906 QMAN_EXTERNAL_MAKE_TRUSTED);
1908 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1912 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1914 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1915 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1917 /* Set to maximum possible according to physical size */
1918 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1919 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1921 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1922 if (hdev->stop_on_err)
1923 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1925 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1926 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1927 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1928 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1929 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1930 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1931 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1932 WREG32(mmDMA0_CORE_PROT + dma_offset,
1933 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1934 /* If the channel is secured, it should be in MMU bypass mode */
1935 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1936 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1937 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1940 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1943 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1945 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1948 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1950 struct gaudi_device *gaudi = hdev->asic_specific;
1951 struct hl_hw_queue *q;
1952 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1954 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1957 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1958 dma_id = gaudi_dma_assignment[i];
1960 * For queues after the CPU Q need to add 1 to get the correct
1961 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1962 * order to get the correct MSI register.
1966 nic_skip = NIC_NUMBER_OF_ENGINES;
1972 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1973 q_idx = 4 * dma_id + j + cpu_skip;
1974 q = &hdev->kernel_queues[q_idx];
1976 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1977 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1981 gaudi_init_dma_core(hdev, dma_id);
1983 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1986 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1989 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1990 int qman_id, u64 qman_base_addr)
1992 u32 mtr_base_lo, mtr_base_hi;
1993 u32 so_base_lo, so_base_hi;
1994 u32 q_off, dma_qm_offset;
1997 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1999 mtr_base_lo = lower_32_bits(CFG_BASE +
2000 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2001 mtr_base_hi = upper_32_bits(CFG_BASE +
2002 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2003 so_base_lo = lower_32_bits(CFG_BASE +
2004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2005 so_base_hi = upper_32_bits(CFG_BASE +
2006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2008 q_off = dma_qm_offset + qman_id * 4;
2011 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2012 lower_32_bits(qman_base_addr));
2013 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2014 upper_32_bits(qman_base_addr));
2016 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2017 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2018 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2020 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2021 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2022 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2024 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2025 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2026 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2028 /* Configure RAZWI IRQ */
2029 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2030 if (hdev->stop_on_err) {
2032 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2034 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2036 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2037 lower_32_bits(CFG_BASE +
2038 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2039 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2040 upper_32_bits(CFG_BASE +
2041 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2042 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2043 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2046 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2047 QM_ARB_ERR_MSG_EN_MASK);
2049 /* Increase ARB WDT to support streams architecture */
2050 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2051 GAUDI_ARB_WDT_TIMEOUT);
2053 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2054 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2055 QMAN_INTERNAL_MAKE_TRUSTED);
2058 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2059 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2060 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2061 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2064 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2066 struct gaudi_device *gaudi = hdev->asic_specific;
2067 struct gaudi_internal_qman_info *q;
2069 int i, j, dma_id, internal_q_index;
2071 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2074 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2075 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2077 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2079 * Add the CPU queue in order to get the correct queue
2080 * number as all internal queue are placed after it
2082 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2084 q = &gaudi->internal_qmans[internal_q_index];
2085 qman_base_addr = (u64) q->pq_dma_addr;
2086 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2090 /* Initializing lower CP for HBM DMA QMAN */
2091 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2093 gaudi_init_dma_core(hdev, dma_id);
2095 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2098 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2101 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2102 int qman_id, u64 qman_base_addr)
2104 u32 mtr_base_lo, mtr_base_hi;
2105 u32 so_base_lo, so_base_hi;
2109 mtr_base_lo = lower_32_bits(CFG_BASE +
2110 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2111 mtr_base_hi = upper_32_bits(CFG_BASE +
2112 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2113 so_base_lo = lower_32_bits(CFG_BASE +
2114 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2115 so_base_hi = upper_32_bits(CFG_BASE +
2116 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2118 q_off = mme_offset + qman_id * 4;
2121 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2122 lower_32_bits(qman_base_addr));
2123 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2124 upper_32_bits(qman_base_addr));
2126 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2127 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2128 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2130 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2131 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2132 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2134 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2135 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2136 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2138 /* Configure RAZWI IRQ */
2139 mme_id = mme_offset /
2140 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2142 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2143 if (hdev->stop_on_err) {
2145 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2147 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2148 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2149 lower_32_bits(CFG_BASE +
2150 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2151 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2152 upper_32_bits(CFG_BASE +
2153 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2154 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2155 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2158 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2159 QM_ARB_ERR_MSG_EN_MASK);
2161 /* Increase ARB WDT to support streams architecture */
2162 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2163 GAUDI_ARB_WDT_TIMEOUT);
2165 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2166 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2167 QMAN_INTERNAL_MAKE_TRUSTED);
2170 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2171 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2172 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2173 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2176 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2178 struct gaudi_device *gaudi = hdev->asic_specific;
2179 struct gaudi_internal_qman_info *q;
2182 int i, internal_q_index;
2184 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2188 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2189 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2192 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2194 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2195 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2196 q = &gaudi->internal_qmans[internal_q_index];
2197 qman_base_addr = (u64) q->pq_dma_addr;
2198 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2204 /* Initializing lower CP for MME QMANs */
2205 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2206 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2207 gaudi_init_mme_qman(hdev, 0, 4, 0);
2209 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2210 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2212 gaudi->hw_cap_initialized |= HW_CAP_MME;
2215 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2216 int qman_id, u64 qman_base_addr)
2218 u32 mtr_base_lo, mtr_base_hi;
2219 u32 so_base_lo, so_base_hi;
2223 mtr_base_lo = lower_32_bits(CFG_BASE +
2224 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2225 mtr_base_hi = upper_32_bits(CFG_BASE +
2226 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2227 so_base_lo = lower_32_bits(CFG_BASE +
2228 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2229 so_base_hi = upper_32_bits(CFG_BASE +
2230 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2232 q_off = tpc_offset + qman_id * 4;
2235 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2236 lower_32_bits(qman_base_addr));
2237 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2238 upper_32_bits(qman_base_addr));
2240 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2241 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2242 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2244 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2245 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2246 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2248 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2249 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2250 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2252 /* Configure RAZWI IRQ */
2253 tpc_id = tpc_offset /
2254 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2256 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2257 if (hdev->stop_on_err) {
2259 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2262 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2263 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2264 lower_32_bits(CFG_BASE +
2265 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2266 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2267 upper_32_bits(CFG_BASE +
2268 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2269 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2270 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2273 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2274 QM_ARB_ERR_MSG_EN_MASK);
2276 /* Increase ARB WDT to support streams architecture */
2277 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2278 GAUDI_ARB_WDT_TIMEOUT);
2280 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2281 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2282 QMAN_INTERNAL_MAKE_TRUSTED);
2285 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2286 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2287 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2288 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2291 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2293 struct gaudi_device *gaudi = hdev->asic_specific;
2294 struct gaudi_internal_qman_info *q;
2296 u32 so_base_hi, tpc_offset = 0;
2297 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2298 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2299 int i, tpc_id, internal_q_index;
2301 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2304 so_base_hi = upper_32_bits(CFG_BASE +
2305 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2307 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2308 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2309 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2310 tpc_id * QMAN_STREAMS + i;
2311 q = &gaudi->internal_qmans[internal_q_index];
2312 qman_base_addr = (u64) q->pq_dma_addr;
2313 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2317 /* Initializing lower CP for TPC QMAN */
2318 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2320 /* Enable the QMAN and TPC channel */
2321 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2326 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2329 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2331 gaudi->hw_cap_initialized |=
2332 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2336 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2338 struct gaudi_device *gaudi = hdev->asic_specific;
2340 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2343 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2344 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2345 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2348 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2350 struct gaudi_device *gaudi = hdev->asic_specific;
2352 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2355 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2356 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2357 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2358 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2359 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2362 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2364 struct gaudi_device *gaudi = hdev->asic_specific;
2366 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2369 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2370 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2373 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2375 struct gaudi_device *gaudi = hdev->asic_specific;
2379 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2382 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2383 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2384 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2388 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2390 struct gaudi_device *gaudi = hdev->asic_specific;
2392 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2395 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2396 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2397 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2398 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2401 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2403 struct gaudi_device *gaudi = hdev->asic_specific;
2405 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2408 /* Stop CPs of HBM DMA QMANs */
2410 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2411 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2412 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2413 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2414 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2417 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2419 struct gaudi_device *gaudi = hdev->asic_specific;
2421 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2424 /* Stop CPs of MME QMANs */
2425 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2429 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2431 struct gaudi_device *gaudi = hdev->asic_specific;
2433 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2436 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2437 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2443 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2446 static void gaudi_pci_dma_stall(struct hl_device *hdev)
2448 struct gaudi_device *gaudi = hdev->asic_specific;
2450 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2453 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2454 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2455 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2458 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2460 struct gaudi_device *gaudi = hdev->asic_specific;
2462 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2465 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2466 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2467 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2468 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2469 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2472 static void gaudi_mme_stall(struct hl_device *hdev)
2474 struct gaudi_device *gaudi = hdev->asic_specific;
2476 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2479 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2480 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2481 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2482 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2483 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2484 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2485 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2486 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2487 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2488 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2489 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2490 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2491 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2492 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2493 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2494 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2495 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2498 static void gaudi_tpc_stall(struct hl_device *hdev)
2500 struct gaudi_device *gaudi = hdev->asic_specific;
2502 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2505 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2506 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2507 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2508 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2509 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2510 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2511 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2512 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2515 static void gaudi_set_clock_gating(struct hl_device *hdev)
2517 struct gaudi_device *gaudi = hdev->asic_specific;
2522 /* In case we are during debug session, don't enable the clock gate
2523 * as it may interfere
2528 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2529 enable = !!(hdev->clock_gating_mask &
2530 (BIT_ULL(gaudi_dma_assignment[i])));
2532 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2533 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2534 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2535 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2536 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2539 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2540 enable = !!(hdev->clock_gating_mask &
2541 (BIT_ULL(gaudi_dma_assignment[i])));
2543 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2544 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2545 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2546 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2547 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2550 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2551 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2552 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2554 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2555 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2556 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2558 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2559 enable = !!(hdev->clock_gating_mask &
2560 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2562 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2563 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2564 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2565 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2567 qman_offset += TPC_QMAN_OFFSET;
2570 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2573 static void gaudi_disable_clock_gating(struct hl_device *hdev)
2575 struct gaudi_device *gaudi = hdev->asic_specific;
2579 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2582 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2583 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2584 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2586 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2589 WREG32(mmMME0_QM_CGM_CFG, 0);
2590 WREG32(mmMME0_QM_CGM_CFG1, 0);
2591 WREG32(mmMME2_QM_CGM_CFG, 0);
2592 WREG32(mmMME2_QM_CGM_CFG1, 0);
2594 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2595 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2596 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2598 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2601 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2604 static void gaudi_enable_timestamp(struct hl_device *hdev)
2606 /* Disable the timestamp counter */
2607 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2609 /* Zero the lower/upper parts of the 64-bit counter */
2610 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2611 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2613 /* Enable the counter */
2614 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2617 static void gaudi_disable_timestamp(struct hl_device *hdev)
2619 /* Disable the timestamp counter */
2620 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2623 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2625 u32 wait_timeout_ms;
2628 "Halting compute engines and disabling interrupts\n");
2631 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2633 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2636 gaudi_stop_mme_qmans(hdev);
2637 gaudi_stop_tpc_qmans(hdev);
2638 gaudi_stop_hbm_dma_qmans(hdev);
2639 gaudi_stop_pci_dma_qmans(hdev);
2641 hdev->asic_funcs->disable_clock_gating(hdev);
2643 msleep(wait_timeout_ms);
2645 gaudi_pci_dma_stall(hdev);
2646 gaudi_hbm_dma_stall(hdev);
2647 gaudi_tpc_stall(hdev);
2648 gaudi_mme_stall(hdev);
2650 msleep(wait_timeout_ms);
2652 gaudi_disable_mme_qmans(hdev);
2653 gaudi_disable_tpc_qmans(hdev);
2654 gaudi_disable_hbm_dma_qmans(hdev);
2655 gaudi_disable_pci_dma_qmans(hdev);
2657 gaudi_disable_timestamp(hdev);
2659 gaudi_disable_msi(hdev);
2662 static int gaudi_mmu_init(struct hl_device *hdev)
2664 struct asic_fixed_properties *prop = &hdev->asic_prop;
2665 struct gaudi_device *gaudi = hdev->asic_specific;
2669 if (!hdev->mmu_enable)
2672 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2675 hdev->dram_supports_virtual_memory = false;
2677 for (i = 0 ; i < prop->max_asid ; i++) {
2678 hop0_addr = prop->mmu_pgt_addr +
2679 (i * prop->mmu_hop_table_size);
2681 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2684 "failed to set hop0 addr for asid %d\n", i);
2689 /* init MMU cache manage page */
2690 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2691 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2693 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2695 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2696 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2698 WREG32(mmSTLB_HOP_CONFIGURATION,
2699 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2702 * The H/W expects the first PI after init to be 1. After wraparound
2705 gaudi->mmu_cache_inv_pi = 1;
2707 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2715 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2719 /* HBM scrambler must be initialized before pushing F/W to HBM */
2720 gaudi_init_scrambler_hbm(hdev);
2722 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2724 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2727 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2731 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2733 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2736 static void gaudi_read_device_fw_version(struct hl_device *hdev,
2737 enum hl_fw_component fwc)
2745 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2746 dest = hdev->asic_prop.uboot_ver;
2749 case FW_COMP_PREBOOT:
2750 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2751 dest = hdev->asic_prop.preboot_ver;
2755 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2759 ver_off &= ~((u32)SRAM_BASE_ADDR);
2761 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2762 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2765 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2767 strcpy(dest, "unavailable");
2771 static int gaudi_init_cpu(struct hl_device *hdev)
2773 struct gaudi_device *gaudi = hdev->asic_specific;
2776 if (!hdev->cpu_enable)
2779 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2783 * The device CPU works with 40 bits addresses.
2784 * This register sets the extension to 50 bits.
2786 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2788 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2789 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2790 mmCPU_CMD_STATUS_TO_HOST,
2792 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2793 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2798 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2803 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2805 struct gaudi_device *gaudi = hdev->asic_specific;
2808 struct hl_hw_queue *cpu_pq =
2809 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2812 if (!hdev->cpu_queues_enable)
2815 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2818 eq = &hdev->event_queue;
2820 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2821 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2823 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2824 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2826 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2827 lower_32_bits(hdev->cpu_accessible_dma_address));
2828 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2829 upper_32_bits(hdev->cpu_accessible_dma_address));
2831 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2832 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2833 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2835 /* Used for EQ CI */
2836 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2838 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2840 if (gaudi->multi_msi_mode)
2841 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2843 WREG32(mmCPU_IF_QUEUE_INIT,
2844 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2846 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2848 err = hl_poll_timeout(
2850 mmCPU_IF_QUEUE_INIT,
2852 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2858 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
2862 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2866 static void gaudi_pre_hw_init(struct hl_device *hdev)
2868 /* Perform read from the device to make sure device is up */
2869 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2872 * Let's mark in the H/W that we have reached this point. We check
2873 * this value in the reset_before_init function to understand whether
2874 * we need to reset the chip before doing H/W init. This register is
2875 * cleared by the H/W upon H/W reset
2877 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2879 /* Set the access through PCI bars (Linux driver only) as secured */
2880 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2881 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2883 /* Perform read to flush the waiting writes to ensure configuration
2884 * was set in the device
2886 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2888 if (hdev->axi_drain) {
2889 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2890 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2891 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2892 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2894 /* Perform read to flush the DRAIN cfg */
2895 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2897 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2898 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2900 /* Perform read to flush the DRAIN cfg */
2901 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2904 /* Configure the reset registers. Must be done as early as possible
2905 * in case we fail during H/W initialization
2907 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2908 (CFG_RST_H_DMA_MASK |
2909 CFG_RST_H_MME_MASK |
2911 CFG_RST_H_TPC_7_MASK));
2913 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2915 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2916 (CFG_RST_H_HBM_MASK |
2917 CFG_RST_H_TPC_7_MASK |
2918 CFG_RST_H_NIC_MASK |
2920 CFG_RST_H_DMA_MASK |
2921 CFG_RST_H_MME_MASK |
2922 CFG_RST_H_CPU_MASK |
2923 CFG_RST_H_MMU_MASK));
2925 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2926 (CFG_RST_L_IF_MASK |
2927 CFG_RST_L_PSOC_MASK |
2928 CFG_RST_L_TPC_MASK));
2931 static int gaudi_hw_init(struct hl_device *hdev)
2935 dev_info(hdev->dev, "Starting initialization of H/W\n");
2937 gaudi_pre_hw_init(hdev);
2939 gaudi_init_pci_dma_qmans(hdev);
2941 gaudi_init_hbm_dma_qmans(hdev);
2943 rc = gaudi_init_cpu(hdev);
2945 dev_err(hdev->dev, "failed to initialize CPU\n");
2949 /* SRAM scrambler must be initialized after CPU is running from HBM */
2950 gaudi_init_scrambler_sram(hdev);
2952 /* This is here just in case we are working without CPU */
2953 gaudi_init_scrambler_hbm(hdev);
2955 gaudi_init_golden_registers(hdev);
2957 rc = gaudi_mmu_init(hdev);
2961 gaudi_init_security(hdev);
2963 gaudi_init_mme_qmans(hdev);
2965 gaudi_init_tpc_qmans(hdev);
2967 hdev->asic_funcs->set_clock_gating(hdev);
2969 gaudi_enable_timestamp(hdev);
2971 /* MSI must be enabled before CPU queues are initialized */
2972 rc = gaudi_enable_msi(hdev);
2974 goto disable_queues;
2976 /* must be called after MSI was enabled */
2977 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2979 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2984 /* Perform read from the device to flush all configuration */
2985 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2990 gaudi_disable_msi(hdev);
2992 gaudi_disable_mme_qmans(hdev);
2993 gaudi_disable_pci_dma_qmans(hdev);
2998 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3000 struct gaudi_device *gaudi = hdev->asic_specific;
3001 u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3004 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3009 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3010 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3012 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3013 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3016 /* Set device to handle FLR by H/W as we will put the device CPU to
3019 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3020 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3022 /* I don't know what is the state of the CPU so make sure it is
3023 * stopped in any means necessary
3025 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3026 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3028 msleep(cpu_timeout_ms);
3030 /* Tell ASIC not to re-initialize PCIe */
3031 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3033 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3036 * rdata[31:0] = strap_read_val;
3037 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3039 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3040 (boot_strap & 0x001FFFFF));
3041 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3043 /* Restart BTL/BLR upon hard-reset */
3044 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3046 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3047 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3049 "Issued HARD reset command, going to wait %dms\n",
3053 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3054 * itself is in reset. Need to wait until the reset is deasserted
3056 msleep(reset_timeout_ms);
3058 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3059 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3061 "Timeout while waiting for device to reset 0x%x\n",
3064 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3066 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3067 HW_CAP_HBM | HW_CAP_PCI_DMA |
3068 HW_CAP_MME | HW_CAP_TPC_MASK |
3069 HW_CAP_HBM_DMA | HW_CAP_PLL |
3071 HW_CAP_SRAM_SCRAMBLER |
3072 HW_CAP_HBM_SCRAMBLER |
3075 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3078 static int gaudi_suspend(struct hl_device *hdev)
3082 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3084 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3089 static int gaudi_resume(struct hl_device *hdev)
3091 return gaudi_init_iatu(hdev);
3094 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3095 void *cpu_addr, dma_addr_t dma_addr, size_t size)
3099 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3100 VM_DONTCOPY | VM_NORESERVE;
3102 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
3104 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3109 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3111 struct gaudi_device *gaudi = hdev->asic_specific;
3112 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3114 bool invalid_queue = false;
3116 switch (hw_queue_id) {
3117 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3118 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3119 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3120 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3121 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3124 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3125 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3126 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3127 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3128 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3131 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3132 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3133 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3135 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3138 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3139 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3140 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3142 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3145 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3146 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3147 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3152 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3153 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3154 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3159 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3160 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3161 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3166 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3167 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3168 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3173 case GAUDI_QUEUE_ID_CPU_PQ:
3174 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3175 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3177 invalid_queue = true;
3180 case GAUDI_QUEUE_ID_MME_0_0:
3181 db_reg_offset = mmMME2_QM_PQ_PI_0;
3184 case GAUDI_QUEUE_ID_MME_0_1:
3185 db_reg_offset = mmMME2_QM_PQ_PI_1;
3188 case GAUDI_QUEUE_ID_MME_0_2:
3189 db_reg_offset = mmMME2_QM_PQ_PI_2;
3192 case GAUDI_QUEUE_ID_MME_0_3:
3193 db_reg_offset = mmMME2_QM_PQ_PI_3;
3196 case GAUDI_QUEUE_ID_MME_1_0:
3197 db_reg_offset = mmMME0_QM_PQ_PI_0;
3200 case GAUDI_QUEUE_ID_MME_1_1:
3201 db_reg_offset = mmMME0_QM_PQ_PI_1;
3204 case GAUDI_QUEUE_ID_MME_1_2:
3205 db_reg_offset = mmMME0_QM_PQ_PI_2;
3208 case GAUDI_QUEUE_ID_MME_1_3:
3209 db_reg_offset = mmMME0_QM_PQ_PI_3;
3212 case GAUDI_QUEUE_ID_TPC_0_0:
3213 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3216 case GAUDI_QUEUE_ID_TPC_0_1:
3217 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3220 case GAUDI_QUEUE_ID_TPC_0_2:
3221 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3224 case GAUDI_QUEUE_ID_TPC_0_3:
3225 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3228 case GAUDI_QUEUE_ID_TPC_1_0:
3229 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3232 case GAUDI_QUEUE_ID_TPC_1_1:
3233 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3236 case GAUDI_QUEUE_ID_TPC_1_2:
3237 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3240 case GAUDI_QUEUE_ID_TPC_1_3:
3241 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3244 case GAUDI_QUEUE_ID_TPC_2_0:
3245 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3248 case GAUDI_QUEUE_ID_TPC_2_1:
3249 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3252 case GAUDI_QUEUE_ID_TPC_2_2:
3253 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3256 case GAUDI_QUEUE_ID_TPC_2_3:
3257 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3260 case GAUDI_QUEUE_ID_TPC_3_0:
3261 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3264 case GAUDI_QUEUE_ID_TPC_3_1:
3265 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3268 case GAUDI_QUEUE_ID_TPC_3_2:
3269 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3272 case GAUDI_QUEUE_ID_TPC_3_3:
3273 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3276 case GAUDI_QUEUE_ID_TPC_4_0:
3277 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3280 case GAUDI_QUEUE_ID_TPC_4_1:
3281 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3284 case GAUDI_QUEUE_ID_TPC_4_2:
3285 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3288 case GAUDI_QUEUE_ID_TPC_4_3:
3289 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3292 case GAUDI_QUEUE_ID_TPC_5_0:
3293 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3296 case GAUDI_QUEUE_ID_TPC_5_1:
3297 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3300 case GAUDI_QUEUE_ID_TPC_5_2:
3301 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3304 case GAUDI_QUEUE_ID_TPC_5_3:
3305 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3308 case GAUDI_QUEUE_ID_TPC_6_0:
3309 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3312 case GAUDI_QUEUE_ID_TPC_6_1:
3313 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3316 case GAUDI_QUEUE_ID_TPC_6_2:
3317 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3320 case GAUDI_QUEUE_ID_TPC_6_3:
3321 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3324 case GAUDI_QUEUE_ID_TPC_7_0:
3325 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3328 case GAUDI_QUEUE_ID_TPC_7_1:
3329 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3332 case GAUDI_QUEUE_ID_TPC_7_2:
3333 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3336 case GAUDI_QUEUE_ID_TPC_7_3:
3337 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3341 invalid_queue = true;
3344 if (invalid_queue) {
3345 /* Should never get here */
3346 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3353 /* ring the doorbell */
3354 WREG32(db_reg_offset, db_value);
3356 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3357 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3358 GAUDI_EVENT_PI_UPDATE);
3361 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3364 __le64 *pbd = (__le64 *) bd;
3366 /* The QMANs are on the host memory so a simple copy suffice */
3371 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3372 dma_addr_t *dma_handle, gfp_t flags)
3374 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3377 /* Shift to the device's base physical address of host memory */
3379 *dma_handle += HOST_PHYS_BASE;
3384 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3385 void *cpu_addr, dma_addr_t dma_handle)
3387 /* Cancel the device's base physical address of host memory */
3388 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3390 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3393 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3394 u32 queue_id, dma_addr_t *dma_handle,
3397 struct gaudi_device *gaudi = hdev->asic_specific;
3398 struct gaudi_internal_qman_info *q;
3400 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3401 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3402 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3406 q = &gaudi->internal_qmans[queue_id];
3407 *dma_handle = q->pq_dma_addr;
3408 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3410 return q->pq_kernel_addr;
3413 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3414 u16 len, u32 timeout, long *result)
3416 struct gaudi_device *gaudi = hdev->asic_specific;
3418 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3425 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3427 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3431 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3433 struct packet_msg_prot *fence_pkt;
3434 dma_addr_t pkt_dma_addr;
3435 u32 fence_val, tmp, timeout_usec;
3436 dma_addr_t fence_dma_addr;
3441 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3443 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3445 fence_val = GAUDI_QMAN0_FENCE_VAL;
3447 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3451 "Failed to allocate memory for H/W queue %d testing\n",
3458 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3459 sizeof(struct packet_msg_prot),
3460 GFP_KERNEL, &pkt_dma_addr);
3463 "Failed to allocate packet for H/W queue %d testing\n",
3466 goto free_fence_ptr;
3469 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3470 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3471 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3473 fence_pkt->ctl = cpu_to_le32(tmp);
3474 fence_pkt->value = cpu_to_le32(fence_val);
3475 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3477 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3478 sizeof(struct packet_msg_prot),
3482 "Failed to send fence packet to H/W queue %d\n",
3487 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3488 1000, timeout_usec, true);
3490 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3492 if (rc == -ETIMEDOUT) {
3494 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3495 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3500 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3503 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3508 static int gaudi_test_cpu_queue(struct hl_device *hdev)
3510 struct gaudi_device *gaudi = hdev->asic_specific;
3513 * check capability here as send_cpu_message() won't update the result
3514 * value if no capability
3516 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3519 return hl_fw_test_cpu_queue(hdev);
3522 static int gaudi_test_queues(struct hl_device *hdev)
3524 int i, rc, ret_val = 0;
3526 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3527 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3528 rc = gaudi_test_queue(hdev, i);
3534 rc = gaudi_test_cpu_queue(hdev);
3541 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3542 gfp_t mem_flags, dma_addr_t *dma_handle)
3546 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3549 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3551 /* Shift to the device's base physical address of host memory */
3553 *dma_handle += HOST_PHYS_BASE;
3558 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3559 dma_addr_t dma_addr)
3561 /* Cancel the device's base physical address of host memory */
3562 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3564 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3567 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3568 size_t size, dma_addr_t *dma_handle)
3570 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3573 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3574 size_t size, void *vaddr)
3576 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3579 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3580 int nents, enum dma_data_direction dir)
3582 struct scatterlist *sg;
3585 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3588 /* Shift to the device's base physical address of host memory */
3589 for_each_sg(sgl, sg, nents, i)
3590 sg->dma_address += HOST_PHYS_BASE;
3595 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3596 int nents, enum dma_data_direction dir)
3598 struct scatterlist *sg;
3601 /* Cancel the device's base physical address of host memory */
3602 for_each_sg(sgl, sg, nents, i)
3603 sg->dma_address -= HOST_PHYS_BASE;
3605 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3608 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3609 struct sg_table *sgt)
3611 struct scatterlist *sg, *sg_next_iter;
3612 u32 count, dma_desc_cnt;
3614 dma_addr_t addr, addr_next;
3618 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3620 len = sg_dma_len(sg);
3621 addr = sg_dma_address(sg);
3626 while ((count + 1) < sgt->nents) {
3627 sg_next_iter = sg_next(sg);
3628 len_next = sg_dma_len(sg_next_iter);
3629 addr_next = sg_dma_address(sg_next_iter);
3634 if ((addr + len == addr_next) &&
3635 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3647 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3650 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3651 struct hl_cs_parser *parser,
3652 struct packet_lin_dma *user_dma_pkt,
3653 u64 addr, enum dma_data_direction dir)
3655 struct hl_userptr *userptr;
3658 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3659 parser->job_userptr_list, &userptr))
3660 goto already_pinned;
3662 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3666 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3671 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3673 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3674 userptr->sgt->nents, dir);
3676 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3680 userptr->dma_mapped = true;
3684 parser->patched_cb_size +=
3685 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3690 hl_unpin_host_memory(hdev, userptr);
3696 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3697 struct hl_cs_parser *parser,
3698 struct packet_lin_dma *user_dma_pkt,
3701 enum dma_data_direction dir;
3702 bool skip_host_mem_pin = false, user_memset;
3706 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3707 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3708 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3712 skip_host_mem_pin = true;
3714 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3715 dir = DMA_TO_DEVICE;
3716 addr = le64_to_cpu(user_dma_pkt->src_addr);
3718 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3719 dir = DMA_FROM_DEVICE;
3720 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3721 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3722 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3725 if (skip_host_mem_pin)
3726 parser->patched_cb_size += sizeof(*user_dma_pkt);
3728 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3734 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3735 struct hl_cs_parser *parser,
3736 struct packet_lin_dma *user_dma_pkt)
3738 bool src_in_host = false;
3739 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3740 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3741 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3743 dev_dbg(hdev->dev, "DMA packet details:\n");
3744 dev_dbg(hdev->dev, "source == 0x%llx\n",
3745 le64_to_cpu(user_dma_pkt->src_addr));
3746 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3747 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3750 * Special handling for DMA with size 0. Bypass all validations
3751 * because no transactions will be done except for WR_COMP, which
3752 * is not a security issue
3754 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3755 parser->patched_cb_size += sizeof(*user_dma_pkt);
3759 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3762 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3766 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3767 struct hl_cs_parser *parser,
3768 struct packet_load_and_exe *user_pkt)
3772 cfg = le32_to_cpu(user_pkt->cfg);
3774 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3776 "User not allowed to use Load and Execute\n");
3780 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3785 static int gaudi_validate_cb(struct hl_device *hdev,
3786 struct hl_cs_parser *parser, bool is_mmu)
3788 u32 cb_parsed_length = 0;
3791 parser->patched_cb_size = 0;
3793 /* cb_user_size is more than 0 so loop will always be executed */
3794 while (cb_parsed_length < parser->user_cb_size) {
3795 enum packet_id pkt_id;
3797 struct gaudi_packet *user_pkt;
3799 user_pkt = (struct gaudi_packet *) (uintptr_t)
3800 (parser->user_cb->kernel_address + cb_parsed_length);
3802 pkt_id = (enum packet_id) (
3803 (le64_to_cpu(user_pkt->header) &
3804 PACKET_HEADER_PACKET_ID_MASK) >>
3805 PACKET_HEADER_PACKET_ID_SHIFT);
3807 if (!validate_packet_id(pkt_id)) {
3808 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3813 pkt_size = gaudi_packet_sizes[pkt_id];
3814 cb_parsed_length += pkt_size;
3815 if (cb_parsed_length > parser->user_cb_size) {
3817 "packet 0x%x is out of CB boundary\n", pkt_id);
3823 case PACKET_MSG_PROT:
3825 "User not allowed to use MSG_PROT\n");
3830 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3835 dev_err(hdev->dev, "User not allowed to use STOP\n");
3839 case PACKET_WREG_BULK:
3841 "User not allowed to use WREG_BULK\n");
3845 case PACKET_LOAD_AND_EXE:
3846 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3847 (struct packet_load_and_exe *) user_pkt);
3850 case PACKET_LIN_DMA:
3851 parser->contains_dma_pkt = true;
3853 parser->patched_cb_size += pkt_size;
3855 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3856 (struct packet_lin_dma *) user_pkt);
3859 case PACKET_WREG_32:
3860 case PACKET_MSG_LONG:
3861 case PACKET_MSG_SHORT:
3865 case PACKET_ARB_POINT:
3866 parser->patched_cb_size += pkt_size;
3870 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3881 * The new CB should have space at the end for two MSG_PROT packets:
3882 * 1. A packet that will act as a completion packet
3883 * 2. A packet that will generate MSI-X interrupt
3885 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3890 static int gaudi_patch_dma_packet(struct hl_device *hdev,
3891 struct hl_cs_parser *parser,
3892 struct packet_lin_dma *user_dma_pkt,
3893 struct packet_lin_dma *new_dma_pkt,
3894 u32 *new_dma_pkt_size)
3896 struct hl_userptr *userptr;
3897 struct scatterlist *sg, *sg_next_iter;
3898 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3900 dma_addr_t dma_addr, dma_addr_next;
3901 u64 device_memory_addr, addr;
3902 enum dma_data_direction dir;
3903 struct sg_table *sgt;
3904 bool src_in_host = false;
3905 bool skip_host_mem_pin = false;
3908 ctl = le32_to_cpu(user_dma_pkt->ctl);
3910 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3913 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3914 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3917 addr = le64_to_cpu(user_dma_pkt->src_addr);
3918 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3919 dir = DMA_TO_DEVICE;
3921 skip_host_mem_pin = true;
3923 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3924 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3925 dir = DMA_FROM_DEVICE;
3928 if ((!skip_host_mem_pin) &&
3929 (!hl_userptr_is_pinned(hdev, addr,
3930 le32_to_cpu(user_dma_pkt->tsize),
3931 parser->job_userptr_list, &userptr))) {
3932 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3933 addr, user_dma_pkt->tsize);
3937 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3938 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3939 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3943 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3948 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3949 len = sg_dma_len(sg);
3950 dma_addr = sg_dma_address(sg);
3955 while ((count + 1) < sgt->nents) {
3956 sg_next_iter = sg_next(sg);
3957 len_next = sg_dma_len(sg_next_iter);
3958 dma_addr_next = sg_dma_address(sg_next_iter);
3963 if ((dma_addr + len == dma_addr_next) &&
3964 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3973 ctl = le32_to_cpu(user_dma_pkt->ctl);
3974 if (likely(dma_desc_cnt))
3975 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3976 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3977 new_dma_pkt->ctl = cpu_to_le32(ctl);
3978 new_dma_pkt->tsize = cpu_to_le32(len);
3980 if (dir == DMA_TO_DEVICE) {
3981 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3982 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3984 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3985 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3989 device_memory_addr += len;
3994 if (!dma_desc_cnt) {
3996 "Error of 0 SG entries when patching DMA packet\n");
4000 /* Fix the last dma packet - wrcomp must be as user set it */
4002 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4004 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4009 static int gaudi_patch_cb(struct hl_device *hdev,
4010 struct hl_cs_parser *parser)
4012 u32 cb_parsed_length = 0;
4013 u32 cb_patched_cur_length = 0;
4016 /* cb_user_size is more than 0 so loop will always be executed */
4017 while (cb_parsed_length < parser->user_cb_size) {
4018 enum packet_id pkt_id;
4020 u32 new_pkt_size = 0;
4021 struct gaudi_packet *user_pkt, *kernel_pkt;
4023 user_pkt = (struct gaudi_packet *) (uintptr_t)
4024 (parser->user_cb->kernel_address + cb_parsed_length);
4025 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4026 (parser->patched_cb->kernel_address +
4027 cb_patched_cur_length);
4029 pkt_id = (enum packet_id) (
4030 (le64_to_cpu(user_pkt->header) &
4031 PACKET_HEADER_PACKET_ID_MASK) >>
4032 PACKET_HEADER_PACKET_ID_SHIFT);
4034 if (!validate_packet_id(pkt_id)) {
4035 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4040 pkt_size = gaudi_packet_sizes[pkt_id];
4041 cb_parsed_length += pkt_size;
4042 if (cb_parsed_length > parser->user_cb_size) {
4044 "packet 0x%x is out of CB boundary\n", pkt_id);
4050 case PACKET_LIN_DMA:
4051 rc = gaudi_patch_dma_packet(hdev, parser,
4052 (struct packet_lin_dma *) user_pkt,
4053 (struct packet_lin_dma *) kernel_pkt,
4055 cb_patched_cur_length += new_pkt_size;
4058 case PACKET_MSG_PROT:
4060 "User not allowed to use MSG_PROT\n");
4065 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4070 dev_err(hdev->dev, "User not allowed to use STOP\n");
4074 case PACKET_WREG_32:
4075 case PACKET_WREG_BULK:
4076 case PACKET_MSG_LONG:
4077 case PACKET_MSG_SHORT:
4081 case PACKET_ARB_POINT:
4082 case PACKET_LOAD_AND_EXE:
4083 memcpy(kernel_pkt, user_pkt, pkt_size);
4084 cb_patched_cur_length += pkt_size;
4088 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4101 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4102 struct hl_cs_parser *parser)
4104 u64 patched_cb_handle;
4105 u32 patched_cb_size;
4106 struct hl_cb *user_cb;
4110 * The new CB should have space at the end for two MSG_PROT pkt:
4111 * 1. A packet that will act as a completion packet
4112 * 2. A packet that will generate MSI interrupt
4114 parser->patched_cb_size = parser->user_cb_size +
4115 sizeof(struct packet_msg_prot) * 2;
4117 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4118 parser->patched_cb_size, false,
4119 &patched_cb_handle);
4123 "Failed to allocate patched CB for DMA CS %d\n",
4128 patched_cb_handle >>= PAGE_SHIFT;
4129 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4130 (u32) patched_cb_handle);
4131 /* hl_cb_get should never fail here so use kernel WARN */
4132 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4133 (u32) patched_cb_handle);
4134 if (!parser->patched_cb) {
4140 * The check that parser->user_cb_size <= parser->user_cb->size was done
4141 * in validate_queue_index().
4143 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4144 (void *) (uintptr_t) parser->user_cb->kernel_address,
4145 parser->user_cb_size);
4147 patched_cb_size = parser->patched_cb_size;
4149 /* Validate patched CB instead of user CB */
4150 user_cb = parser->user_cb;
4151 parser->user_cb = parser->patched_cb;
4152 rc = gaudi_validate_cb(hdev, parser, true);
4153 parser->user_cb = user_cb;
4156 hl_cb_put(parser->patched_cb);
4160 if (patched_cb_size != parser->patched_cb_size) {
4161 dev_err(hdev->dev, "user CB size mismatch\n");
4162 hl_cb_put(parser->patched_cb);
4169 * Always call cb destroy here because we still have 1 reference
4170 * to it by calling cb_get earlier. After the job will be completed,
4171 * cb_put will release it, but here we want to remove it from the
4174 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4175 patched_cb_handle << PAGE_SHIFT);
4180 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4181 struct hl_cs_parser *parser)
4183 u64 patched_cb_handle;
4186 rc = gaudi_validate_cb(hdev, parser, false);
4191 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4192 parser->patched_cb_size, false,
4193 &patched_cb_handle);
4196 "Failed to allocate patched CB for DMA CS %d\n", rc);
4200 patched_cb_handle >>= PAGE_SHIFT;
4201 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4202 (u32) patched_cb_handle);
4203 /* hl_cb_get should never fail here so use kernel WARN */
4204 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4205 (u32) patched_cb_handle);
4206 if (!parser->patched_cb) {
4211 rc = gaudi_patch_cb(hdev, parser);
4214 hl_cb_put(parser->patched_cb);
4218 * Always call cb destroy here because we still have 1 reference
4219 * to it by calling cb_get earlier. After the job will be completed,
4220 * cb_put will release it, but here we want to remove it from the
4223 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4224 patched_cb_handle << PAGE_SHIFT);
4228 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4232 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4233 struct hl_cs_parser *parser)
4235 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4237 /* For internal queue jobs just check if CB address is valid */
4238 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4239 parser->user_cb_size,
4240 asic_prop->sram_user_base_address,
4241 asic_prop->sram_end_address))
4244 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4245 parser->user_cb_size,
4246 asic_prop->dram_user_base_address,
4247 asic_prop->dram_end_address))
4250 /* PMMU and HPMMU addresses are equal, check only one of them */
4251 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4252 parser->user_cb_size,
4253 asic_prop->pmmu.start_addr,
4254 asic_prop->pmmu.end_addr))
4258 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4259 parser->user_cb, parser->user_cb_size);
4264 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4266 struct gaudi_device *gaudi = hdev->asic_specific;
4268 if (parser->queue_type == QUEUE_TYPE_INT)
4269 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4271 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4272 return gaudi_parse_cb_mmu(hdev, parser);
4274 return gaudi_parse_cb_no_mmu(hdev, parser);
4277 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4278 u64 kernel_address, u32 len,
4279 u64 cq_addr, u32 cq_val, u32 msi_vec,
4282 struct gaudi_device *gaudi = hdev->asic_specific;
4283 struct packet_msg_prot *cq_pkt;
4286 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4287 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4289 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4290 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4293 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4295 cq_pkt->ctl = cpu_to_le32(tmp);
4296 cq_pkt->value = cpu_to_le32(cq_val);
4297 cq_pkt->addr = cpu_to_le64(cq_addr);
4301 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4302 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4303 cq_pkt->ctl = cpu_to_le32(tmp);
4304 cq_pkt->value = cpu_to_le32(1);
4306 if (!gaudi->multi_msi_mode)
4309 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4312 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4314 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4317 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4320 struct packet_lin_dma *lin_dma_pkt;
4321 struct hl_cs_job *job;
4322 u32 cb_size, ctl, err_cause;
4326 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4330 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4331 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4332 cb_size = sizeof(*lin_dma_pkt);
4334 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4335 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4336 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4337 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4338 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4340 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4341 lin_dma_pkt->src_addr = cpu_to_le64(val);
4342 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4343 lin_dma_pkt->tsize = cpu_to_le32(size);
4345 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4347 dev_err(hdev->dev, "Failed to allocate a new job\n");
4352 /* Verify DMA is OK */
4353 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4354 if (err_cause && !hdev->init_done) {
4356 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4358 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4363 job->user_cb->cs_cnt++;
4364 job->user_cb_size = cb_size;
4365 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4366 job->patched_cb = job->user_cb;
4367 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4369 hl_debugfs_add_job(hdev, job);
4371 rc = gaudi_send_job_on_qman0(hdev, job);
4372 hl_debugfs_remove_job(hdev, job);
4376 /* Verify DMA is OK */
4377 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4379 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4381 if (!hdev->init_done) {
4383 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4385 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4391 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4396 static void gaudi_restore_sm_registers(struct hl_device *hdev)
4400 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4401 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4402 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4403 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4406 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4407 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4408 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4409 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4412 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4414 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4415 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4417 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4419 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4420 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4423 static void gaudi_restore_dma_registers(struct hl_device *hdev)
4425 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4426 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4429 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4430 u64 sob_addr = CFG_BASE +
4431 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4433 u32 dma_offset = i * DMA_CORE_OFFSET;
4435 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4436 lower_32_bits(sob_addr));
4437 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4438 upper_32_bits(sob_addr));
4439 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4441 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4442 * modified by the user for SRAM reduction
4445 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4450 static void gaudi_restore_qm_registers(struct hl_device *hdev)
4455 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4456 qman_offset = i * DMA_QMAN_OFFSET;
4457 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4460 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4461 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4462 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4465 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4466 qman_offset = i * TPC_QMAN_OFFSET;
4467 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4471 static void gaudi_restore_user_registers(struct hl_device *hdev)
4473 gaudi_restore_sm_registers(hdev);
4474 gaudi_restore_dma_registers(hdev);
4475 gaudi_restore_qm_registers(hdev);
4478 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4480 struct asic_fixed_properties *prop = &hdev->asic_prop;
4481 u64 addr = prop->sram_user_base_address;
4482 u32 size = hdev->pldm ? 0x10000 :
4483 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4484 u64 val = 0x7777777777777777ull;
4487 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4489 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4493 gaudi_mmu_prepare(hdev, asid);
4495 gaudi_restore_user_registers(hdev);
4500 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4502 struct asic_fixed_properties *prop = &hdev->asic_prop;
4503 struct gaudi_device *gaudi = hdev->asic_specific;
4504 u64 addr = prop->mmu_pgt_addr;
4505 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4507 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4510 return gaudi_memset_device_memory(hdev, addr, size, 0);
4513 static void gaudi_restore_phase_topology(struct hl_device *hdev)
4518 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4520 struct asic_fixed_properties *prop = &hdev->asic_prop;
4521 struct gaudi_device *gaudi = hdev->asic_specific;
4525 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4527 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4528 (hdev->clock_gating_mask &
4529 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4531 dev_err_ratelimited(hdev->dev,
4532 "Can't read register - clock gating is enabled!\n");
4535 *val = RREG32(addr - CFG_BASE);
4538 } else if ((addr >= SRAM_BASE_ADDR) &&
4539 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4540 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4541 (addr - SRAM_BASE_ADDR));
4542 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4543 u64 bar_base_addr = DRAM_PHYS_BASE +
4544 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4546 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4547 if (hbm_bar_addr != U64_MAX) {
4548 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4549 (addr - bar_base_addr));
4551 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4554 if (hbm_bar_addr == U64_MAX)
4556 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4557 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4565 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4567 struct asic_fixed_properties *prop = &hdev->asic_prop;
4568 struct gaudi_device *gaudi = hdev->asic_specific;
4572 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4574 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4575 (hdev->clock_gating_mask &
4576 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4578 dev_err_ratelimited(hdev->dev,
4579 "Can't write register - clock gating is enabled!\n");
4582 WREG32(addr - CFG_BASE, val);
4585 } else if ((addr >= SRAM_BASE_ADDR) &&
4586 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4587 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4588 (addr - SRAM_BASE_ADDR));
4589 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4590 u64 bar_base_addr = DRAM_PHYS_BASE +
4591 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4593 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4594 if (hbm_bar_addr != U64_MAX) {
4595 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4596 (addr - bar_base_addr));
4598 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4601 if (hbm_bar_addr == U64_MAX)
4603 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4604 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4612 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4614 struct asic_fixed_properties *prop = &hdev->asic_prop;
4615 struct gaudi_device *gaudi = hdev->asic_specific;
4619 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4621 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4622 (hdev->clock_gating_mask &
4623 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4625 dev_err_ratelimited(hdev->dev,
4626 "Can't read register - clock gating is enabled!\n");
4629 u32 val_l = RREG32(addr - CFG_BASE);
4630 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4632 *val = (((u64) val_h) << 32) | val_l;
4635 } else if ((addr >= SRAM_BASE_ADDR) &&
4636 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4637 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4638 (addr - SRAM_BASE_ADDR));
4640 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4641 u64 bar_base_addr = DRAM_PHYS_BASE +
4642 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4644 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4645 if (hbm_bar_addr != U64_MAX) {
4646 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4647 (addr - bar_base_addr));
4649 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4652 if (hbm_bar_addr == U64_MAX)
4654 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4655 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4663 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4665 struct asic_fixed_properties *prop = &hdev->asic_prop;
4666 struct gaudi_device *gaudi = hdev->asic_specific;
4670 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4672 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4673 (hdev->clock_gating_mask &
4674 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4676 dev_err_ratelimited(hdev->dev,
4677 "Can't write register - clock gating is enabled!\n");
4680 WREG32(addr - CFG_BASE, lower_32_bits(val));
4681 WREG32(addr + sizeof(u32) - CFG_BASE,
4682 upper_32_bits(val));
4685 } else if ((addr >= SRAM_BASE_ADDR) &&
4686 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4687 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4688 (addr - SRAM_BASE_ADDR));
4690 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4691 u64 bar_base_addr = DRAM_PHYS_BASE +
4692 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4694 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4695 if (hbm_bar_addr != U64_MAX) {
4696 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4697 (addr - bar_base_addr));
4699 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4702 if (hbm_bar_addr == U64_MAX)
4704 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4705 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4713 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4715 struct gaudi_device *gaudi = hdev->asic_specific;
4717 if (hdev->hard_reset_pending)
4720 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4721 (addr - gaudi->hbm_bar_cur_addr));
4724 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4726 struct gaudi_device *gaudi = hdev->asic_specific;
4728 if (hdev->hard_reset_pending)
4731 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4732 (addr - gaudi->hbm_bar_cur_addr));
4735 static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4737 /* mask to zero the MMBP and ASID bits */
4738 WREG32_AND(reg, ~0x7FF);
4739 WREG32_OR(reg, asid);
4742 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4744 struct gaudi_device *gaudi = hdev->asic_specific;
4746 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4749 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4750 WARN(1, "asid %u is too big\n", asid);
4754 mutex_lock(&gaudi->clk_gate_mutex);
4756 hdev->asic_funcs->disable_clock_gating(hdev);
4758 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4759 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4761 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4762 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4764 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4765 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4766 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4767 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4768 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4770 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4771 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4772 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4773 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4774 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4776 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4782 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4788 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4794 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4800 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4806 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4809 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4810 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4811 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4812 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4815 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4816 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4817 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4818 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4823 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4824 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4825 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4826 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4828 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4829 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4831 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4832 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4833 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4836 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4837 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4839 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4840 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4841 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4845 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4847 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4848 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4849 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4850 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4852 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4853 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4855 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4856 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4857 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4858 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4860 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4861 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4863 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4864 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4865 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4866 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4868 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4869 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4871 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4872 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4873 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4874 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4876 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4877 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4879 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4880 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4881 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4882 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4883 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4884 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4885 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4886 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4887 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4888 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4890 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4891 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4892 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4893 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4894 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4895 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4896 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4897 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4898 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4899 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4900 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4901 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4903 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4904 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4906 hdev->asic_funcs->set_clock_gating(hdev);
4908 mutex_unlock(&gaudi->clk_gate_mutex);
4911 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4912 struct hl_cs_job *job)
4914 struct packet_msg_prot *fence_pkt;
4916 dma_addr_t fence_dma_addr;
4918 u32 tmp, timeout, dma_offset;
4922 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4924 timeout = HL_DEVICE_TIMEOUT_USEC;
4926 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4927 dev_err_ratelimited(hdev->dev,
4928 "Can't send driver job on QMAN0 because the device is not idle\n");
4932 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4936 "Failed to allocate fence memory for QMAN0\n");
4940 cb = job->patched_cb;
4942 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4943 job->job_cb_size - sizeof(struct packet_msg_prot));
4945 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4946 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4947 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4949 fence_pkt->ctl = cpu_to_le32(tmp);
4950 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4951 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4953 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4955 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4957 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4958 job->job_cb_size, cb->bus_address);
4960 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4961 goto free_fence_ptr;
4964 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4965 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4968 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4970 if (rc == -ETIMEDOUT) {
4971 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4972 goto free_fence_ptr;
4976 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4977 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4979 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4984 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4986 if (event_type >= GAUDI_EVENT_SIZE)
4987 goto event_not_supported;
4989 if (!gaudi_irq_map_table[event_type].valid)
4990 goto event_not_supported;
4992 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4996 event_not_supported:
4997 snprintf(desc, size, "N/A");
5000 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5001 u32 x_y, bool is_write)
5003 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5005 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5006 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5009 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5010 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5014 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5015 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5019 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5020 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5024 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5025 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5030 goto unknown_initiator;
5033 for (i = 0 ; i < 2 ; i++) {
5034 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5035 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5039 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5040 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5041 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5043 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5046 return "DMA0 or DMA2";
5047 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5048 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5049 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5051 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5054 return "DMA1 or DMA3";
5055 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5056 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5057 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5059 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5062 return "DMA4 or DMA6";
5063 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5064 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5065 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5067 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5070 return "DMA5 or DMA7";
5074 return "unknown initiator";
5077 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5080 u32 val, x_y, axi_id;
5082 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5083 RREG32(mmMMU_UP_RAZWI_READ_ID);
5084 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5085 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5086 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5087 RAZWI_INITIATOR_AXI_ID_SHIFT);
5090 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5091 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5093 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5096 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5098 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5099 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5101 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5102 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5104 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5106 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5107 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5109 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5111 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5113 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5116 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5117 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5118 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5119 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5120 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5121 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5122 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5123 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5124 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5125 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5126 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5128 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5130 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5133 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5135 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5136 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5138 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5139 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5141 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5143 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5144 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5146 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5148 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5156 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5158 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5159 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5160 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5161 RAZWI_INITIATOR_AXI_ID_MASK);
5163 return "unknown initiator";
5166 static void gaudi_print_razwi_info(struct hl_device *hdev)
5168 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5169 dev_err_ratelimited(hdev->dev,
5170 "RAZWI event caused by illegal write of %s\n",
5171 gaudi_get_razwi_initiator_name(hdev, true));
5172 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5175 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5176 dev_err_ratelimited(hdev->dev,
5177 "RAZWI event caused by illegal read of %s\n",
5178 gaudi_get_razwi_initiator_name(hdev, false));
5179 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5183 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5185 struct gaudi_device *gaudi = hdev->asic_specific;
5189 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5192 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5193 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5194 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5196 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5198 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5201 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5204 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5205 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5206 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5208 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5210 dev_err_ratelimited(hdev->dev,
5211 "MMU access error on va 0x%llx\n", addr);
5213 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5218 * +-------------------+------------------------------------------------------+
5219 * | Configuration Reg | Description |
5221 * +-------------------+------------------------------------------------------+
5222 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5223 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5224 * | |0xF34 memory wrappers 63:32 |
5225 * | |0xF38 memory wrappers 95:64 |
5226 * | |0xF3C memory wrappers 127:96 |
5227 * +-------------------+------------------------------------------------------+
5228 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5229 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5230 * | |0xF44 memory wrappers 63:32 |
5231 * | |0xF48 memory wrappers 95:64 |
5232 * | |0xF4C memory wrappers 127:96 |
5233 * +-------------------+------------------------------------------------------+
5235 static int gaudi_extract_ecc_info(struct hl_device *hdev,
5236 struct ecc_info_extract_params *params, u64 *ecc_address,
5237 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5239 struct gaudi_device *gaudi = hdev->asic_specific;
5240 u32 i, num_mem_regs, reg, err_bit;
5241 u64 err_addr, err_word = 0;
5244 num_mem_regs = params->num_memories / 32 +
5245 ((params->num_memories % 32) ? 1 : 0);
5247 if (params->block_address >= CFG_BASE)
5248 params->block_address -= CFG_BASE;
5251 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5253 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5255 if (params->disable_clock_gating) {
5256 mutex_lock(&gaudi->clk_gate_mutex);
5257 hdev->asic_funcs->disable_clock_gating(hdev);
5260 /* Set invalid wrapper index */
5261 *memory_wrapper_idx = 0xFF;
5263 /* Iterate through memory wrappers, a single bit must be set */
5264 for (i = 0 ; i < num_mem_regs ; i++) {
5266 err_word = RREG32(err_addr);
5268 err_bit = __ffs(err_word);
5269 *memory_wrapper_idx = err_bit + (32 * i);
5274 if (*memory_wrapper_idx == 0xFF) {
5275 dev_err(hdev->dev, "ECC error information cannot be found\n");
5277 goto enable_clk_gate;
5280 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5281 *memory_wrapper_idx);
5284 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5286 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5288 /* Clear error indication */
5289 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5291 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5293 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5295 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5298 if (params->disable_clock_gating) {
5299 hdev->asic_funcs->set_clock_gating(hdev);
5301 mutex_unlock(&gaudi->clk_gate_mutex);
5307 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5308 const char *qm_name,
5312 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5315 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5316 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5317 glbl_sts_clr_val = 0;
5318 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5323 if (i == QMAN_STREAMS)
5324 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5326 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5328 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5329 if (glbl_sts_val & BIT(j)) {
5330 dev_err_ratelimited(hdev->dev,
5331 "%s %s. err cause: %s\n",
5333 gaudi_qman_error_cause[j]);
5334 glbl_sts_clr_val |= BIT(j);
5338 /* Write 1 clear errors */
5339 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5342 arb_err_val = RREG32(arb_err_addr);
5347 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5348 if (arb_err_val & BIT(j)) {
5349 dev_err_ratelimited(hdev->dev,
5350 "%s ARB_ERR. err cause: %s\n",
5352 gaudi_qman_arb_error_cause[j]);
5357 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5358 struct hl_eq_ecc_data *ecc_data)
5360 struct ecc_info_extract_params params;
5361 u64 ecc_address = 0, ecc_syndrom = 0;
5362 u8 index, memory_wrapper_idx = 0;
5363 bool extract_info_from_fw;
5366 switch (event_type) {
5367 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5368 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5369 extract_info_from_fw = true;
5371 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5372 index = event_type - GAUDI_EVENT_TPC0_SERR;
5373 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5374 params.num_memories = 90;
5375 params.derr = false;
5376 params.disable_clock_gating = true;
5377 extract_info_from_fw = false;
5379 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5380 index = event_type - GAUDI_EVENT_TPC0_DERR;
5381 params.block_address =
5382 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5383 params.num_memories = 90;
5385 params.disable_clock_gating = true;
5386 extract_info_from_fw = false;
5388 case GAUDI_EVENT_MME0_ACC_SERR:
5389 case GAUDI_EVENT_MME1_ACC_SERR:
5390 case GAUDI_EVENT_MME2_ACC_SERR:
5391 case GAUDI_EVENT_MME3_ACC_SERR:
5392 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5393 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5394 params.num_memories = 128;
5395 params.derr = false;
5396 params.disable_clock_gating = true;
5397 extract_info_from_fw = false;
5399 case GAUDI_EVENT_MME0_ACC_DERR:
5400 case GAUDI_EVENT_MME1_ACC_DERR:
5401 case GAUDI_EVENT_MME2_ACC_DERR:
5402 case GAUDI_EVENT_MME3_ACC_DERR:
5403 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5404 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5405 params.num_memories = 128;
5407 params.disable_clock_gating = true;
5408 extract_info_from_fw = false;
5410 case GAUDI_EVENT_MME0_SBAB_SERR:
5411 case GAUDI_EVENT_MME1_SBAB_SERR:
5412 case GAUDI_EVENT_MME2_SBAB_SERR:
5413 case GAUDI_EVENT_MME3_SBAB_SERR:
5414 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5415 params.block_address =
5416 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5417 params.num_memories = 33;
5418 params.derr = false;
5419 params.disable_clock_gating = true;
5420 extract_info_from_fw = false;
5422 case GAUDI_EVENT_MME0_SBAB_DERR:
5423 case GAUDI_EVENT_MME1_SBAB_DERR:
5424 case GAUDI_EVENT_MME2_SBAB_DERR:
5425 case GAUDI_EVENT_MME3_SBAB_DERR:
5426 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5427 params.block_address =
5428 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5429 params.num_memories = 33;
5431 params.disable_clock_gating = true;
5436 if (extract_info_from_fw) {
5437 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5438 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5439 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5441 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
5442 &ecc_syndrom, &memory_wrapper_idx);
5448 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5449 ecc_address, ecc_syndrom, memory_wrapper_idx);
5452 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5454 u64 glbl_sts_addr, arb_err_addr;
5458 switch (event_type) {
5459 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5460 index = event_type - GAUDI_EVENT_TPC0_QM;
5462 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5464 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5465 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5467 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5468 index = event_type - GAUDI_EVENT_MME0_QM;
5470 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5472 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5473 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5475 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5476 index = event_type - GAUDI_EVENT_DMA0_QM;
5478 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5480 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5481 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5487 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5490 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5495 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5496 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5500 gaudi_print_razwi_info(hdev);
5501 gaudi_print_mmu_error_info(hdev);
5505 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5507 struct gaudi_device *gaudi = hdev->asic_specific;
5509 /* Unmask all IRQs since some could have been received
5510 * during the soft reset
5512 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5515 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5518 u32 base, val, val2;
5520 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5521 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5522 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5523 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5527 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5528 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5529 (val >> 2) & 0x1, (val >> 3) & 0x1,
5532 val2 = RREG32(base + ch * 0x1000 + 0x060);
5534 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5536 RREG32(base + ch * 0x1000 + 0x064),
5537 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5538 (val2 & 0xFF0000) >> 16,
5539 (val2 & 0xFF000000) >> 24);
5542 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5543 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5547 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5548 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5549 (val >> 2) & 0x1, (val >> 3) & 0x1,
5552 val2 = RREG32(base + ch * 0x1000 + 0x070);
5554 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5556 RREG32(base + ch * 0x1000 + 0x074),
5557 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5558 (val2 & 0xFF0000) >> 16,
5559 (val2 & 0xFF000000) >> 24);
5562 /* Clear interrupts */
5563 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5564 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5565 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5566 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5567 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5568 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5571 val = RREG32(base + 0x8F30);
5572 val2 = RREG32(base + 0x8F34);
5576 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5579 val = RREG32(base + 0x8F40);
5580 val2 = RREG32(base + 0x8F44);
5584 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5591 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5593 switch (hbm_event_type) {
5594 case GAUDI_EVENT_HBM0_SPI_0:
5595 case GAUDI_EVENT_HBM0_SPI_1:
5597 case GAUDI_EVENT_HBM1_SPI_0:
5598 case GAUDI_EVENT_HBM1_SPI_1:
5600 case GAUDI_EVENT_HBM2_SPI_0:
5601 case GAUDI_EVENT_HBM2_SPI_1:
5603 case GAUDI_EVENT_HBM3_SPI_0:
5604 case GAUDI_EVENT_HBM3_SPI_1:
5610 /* Should never happen */
5614 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5615 char *interrupt_name)
5617 struct gaudi_device *gaudi = hdev->asic_specific;
5618 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5619 bool soft_reset_required = false;
5621 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5622 * gating, and thus cannot be done in CPU-CP and should be done instead
5626 mutex_lock(&gaudi->clk_gate_mutex);
5628 hdev->asic_funcs->disable_clock_gating(hdev);
5630 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5631 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5633 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5634 if (tpc_interrupts_cause & BIT(i)) {
5635 dev_err_ratelimited(hdev->dev,
5636 "TPC%d_%s interrupt cause: %s\n",
5637 tpc_id, interrupt_name,
5638 gaudi_tpc_interrupts_cause[i]);
5639 /* If this is QM error, we need to soft-reset */
5641 soft_reset_required = true;
5644 /* Clear interrupts */
5645 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5647 hdev->asic_funcs->set_clock_gating(hdev);
5649 mutex_unlock(&gaudi->clk_gate_mutex);
5651 return soft_reset_required;
5654 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5656 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5659 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5661 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5664 static void gaudi_print_clk_change_info(struct hl_device *hdev,
5667 switch (event_type) {
5668 case GAUDI_EVENT_FIX_POWER_ENV_S:
5669 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5670 dev_info_ratelimited(hdev->dev,
5671 "Clock throttling due to power consumption\n");
5674 case GAUDI_EVENT_FIX_POWER_ENV_E:
5675 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5676 dev_info_ratelimited(hdev->dev,
5677 "Power envelop is safe, back to optimal clock\n");
5680 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5681 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5682 dev_info_ratelimited(hdev->dev,
5683 "Clock throttling due to overheating\n");
5686 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5687 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5688 dev_info_ratelimited(hdev->dev,
5689 "Thermal envelop is safe, back to optimal clock\n");
5693 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5699 static void gaudi_handle_eqe(struct hl_device *hdev,
5700 struct hl_eq_entry *eq_entry)
5702 struct gaudi_device *gaudi = hdev->asic_specific;
5703 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5704 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5705 >> EQ_CTL_EVENT_TYPE_SHIFT);
5707 bool reset_required;
5709 gaudi->events_stat[event_type]++;
5710 gaudi->events_stat_aggregate[event_type]++;
5712 switch (event_type) {
5713 case GAUDI_EVENT_PCIE_CORE_DERR:
5714 case GAUDI_EVENT_PCIE_IF_DERR:
5715 case GAUDI_EVENT_PCIE_PHY_DERR:
5716 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5717 case GAUDI_EVENT_MME0_ACC_DERR:
5718 case GAUDI_EVENT_MME0_SBAB_DERR:
5719 case GAUDI_EVENT_MME1_ACC_DERR:
5720 case GAUDI_EVENT_MME1_SBAB_DERR:
5721 case GAUDI_EVENT_MME2_ACC_DERR:
5722 case GAUDI_EVENT_MME2_SBAB_DERR:
5723 case GAUDI_EVENT_MME3_ACC_DERR:
5724 case GAUDI_EVENT_MME3_SBAB_DERR:
5725 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5727 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5728 case GAUDI_EVENT_PSOC_MEM_DERR:
5729 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5730 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5731 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5732 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5733 case GAUDI_EVENT_MMU_DERR:
5734 gaudi_print_irq_info(hdev, event_type, true);
5735 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5736 if (hdev->hard_reset_on_fw_events)
5737 hl_device_reset(hdev, true, false);
5740 case GAUDI_EVENT_GIC500:
5741 case GAUDI_EVENT_AXI_ECC:
5742 case GAUDI_EVENT_L2_RAM_ECC:
5743 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5744 gaudi_print_irq_info(hdev, event_type, false);
5745 if (hdev->hard_reset_on_fw_events)
5746 hl_device_reset(hdev, true, false);
5749 case GAUDI_EVENT_HBM0_SPI_0:
5750 case GAUDI_EVENT_HBM1_SPI_0:
5751 case GAUDI_EVENT_HBM2_SPI_0:
5752 case GAUDI_EVENT_HBM3_SPI_0:
5753 gaudi_print_irq_info(hdev, event_type, false);
5754 gaudi_hbm_read_interrupts(hdev,
5755 gaudi_hbm_event_to_dev(event_type));
5756 if (hdev->hard_reset_on_fw_events)
5757 hl_device_reset(hdev, true, false);
5760 case GAUDI_EVENT_HBM0_SPI_1:
5761 case GAUDI_EVENT_HBM1_SPI_1:
5762 case GAUDI_EVENT_HBM2_SPI_1:
5763 case GAUDI_EVENT_HBM3_SPI_1:
5764 gaudi_print_irq_info(hdev, event_type, false);
5765 gaudi_hbm_read_interrupts(hdev,
5766 gaudi_hbm_event_to_dev(event_type));
5769 case GAUDI_EVENT_TPC0_DEC:
5770 case GAUDI_EVENT_TPC1_DEC:
5771 case GAUDI_EVENT_TPC2_DEC:
5772 case GAUDI_EVENT_TPC3_DEC:
5773 case GAUDI_EVENT_TPC4_DEC:
5774 case GAUDI_EVENT_TPC5_DEC:
5775 case GAUDI_EVENT_TPC6_DEC:
5776 case GAUDI_EVENT_TPC7_DEC:
5777 gaudi_print_irq_info(hdev, event_type, true);
5778 reset_required = gaudi_tpc_read_interrupts(hdev,
5779 tpc_dec_event_to_tpc_id(event_type),
5780 "AXI_SLV_DEC_Error");
5781 if (reset_required) {
5782 dev_err(hdev->dev, "hard reset required due to %s\n",
5783 gaudi_irq_map_table[event_type].name);
5785 if (hdev->hard_reset_on_fw_events)
5786 hl_device_reset(hdev, true, false);
5788 hl_fw_unmask_irq(hdev, event_type);
5792 case GAUDI_EVENT_TPC0_KRN_ERR:
5793 case GAUDI_EVENT_TPC1_KRN_ERR:
5794 case GAUDI_EVENT_TPC2_KRN_ERR:
5795 case GAUDI_EVENT_TPC3_KRN_ERR:
5796 case GAUDI_EVENT_TPC4_KRN_ERR:
5797 case GAUDI_EVENT_TPC5_KRN_ERR:
5798 case GAUDI_EVENT_TPC6_KRN_ERR:
5799 case GAUDI_EVENT_TPC7_KRN_ERR:
5800 gaudi_print_irq_info(hdev, event_type, true);
5801 reset_required = gaudi_tpc_read_interrupts(hdev,
5802 tpc_krn_event_to_tpc_id(event_type),
5804 if (reset_required) {
5805 dev_err(hdev->dev, "hard reset required due to %s\n",
5806 gaudi_irq_map_table[event_type].name);
5808 if (hdev->hard_reset_on_fw_events)
5809 hl_device_reset(hdev, true, false);
5811 hl_fw_unmask_irq(hdev, event_type);
5815 case GAUDI_EVENT_PCIE_CORE_SERR:
5816 case GAUDI_EVENT_PCIE_IF_SERR:
5817 case GAUDI_EVENT_PCIE_PHY_SERR:
5818 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5819 case GAUDI_EVENT_MME0_ACC_SERR:
5820 case GAUDI_EVENT_MME0_SBAB_SERR:
5821 case GAUDI_EVENT_MME1_ACC_SERR:
5822 case GAUDI_EVENT_MME1_SBAB_SERR:
5823 case GAUDI_EVENT_MME2_ACC_SERR:
5824 case GAUDI_EVENT_MME2_SBAB_SERR:
5825 case GAUDI_EVENT_MME3_ACC_SERR:
5826 case GAUDI_EVENT_MME3_SBAB_SERR:
5827 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5828 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5829 case GAUDI_EVENT_PSOC_MEM_SERR:
5830 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5831 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5832 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5833 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5835 case GAUDI_EVENT_MMU_SERR:
5836 gaudi_print_irq_info(hdev, event_type, true);
5837 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5838 hl_fw_unmask_irq(hdev, event_type);
5841 case GAUDI_EVENT_PCIE_DEC:
5842 case GAUDI_EVENT_MME0_WBC_RSP:
5843 case GAUDI_EVENT_MME0_SBAB0_RSP:
5844 case GAUDI_EVENT_MME1_WBC_RSP:
5845 case GAUDI_EVENT_MME1_SBAB0_RSP:
5846 case GAUDI_EVENT_MME2_WBC_RSP:
5847 case GAUDI_EVENT_MME2_SBAB0_RSP:
5848 case GAUDI_EVENT_MME3_WBC_RSP:
5849 case GAUDI_EVENT_MME3_SBAB0_RSP:
5850 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5851 case GAUDI_EVENT_PSOC_AXI_DEC:
5852 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5853 case GAUDI_EVENT_MMU_PAGE_FAULT:
5854 case GAUDI_EVENT_MMU_WR_PERM:
5855 case GAUDI_EVENT_RAZWI_OR_ADC:
5856 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5857 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5858 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5860 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5861 gaudi_print_irq_info(hdev, event_type, true);
5862 gaudi_handle_qman_err(hdev, event_type);
5863 hl_fw_unmask_irq(hdev, event_type);
5866 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5867 gaudi_print_irq_info(hdev, event_type, true);
5868 if (hdev->hard_reset_on_fw_events)
5869 hl_device_reset(hdev, true, false);
5872 case GAUDI_EVENT_TPC0_BMON_SPMU:
5873 case GAUDI_EVENT_TPC1_BMON_SPMU:
5874 case GAUDI_EVENT_TPC2_BMON_SPMU:
5875 case GAUDI_EVENT_TPC3_BMON_SPMU:
5876 case GAUDI_EVENT_TPC4_BMON_SPMU:
5877 case GAUDI_EVENT_TPC5_BMON_SPMU:
5878 case GAUDI_EVENT_TPC6_BMON_SPMU:
5879 case GAUDI_EVENT_TPC7_BMON_SPMU:
5880 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5881 gaudi_print_irq_info(hdev, event_type, false);
5882 hl_fw_unmask_irq(hdev, event_type);
5885 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5886 gaudi_print_clk_change_info(hdev, event_type);
5887 hl_fw_unmask_irq(hdev, event_type);
5890 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5891 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5893 "Received high temp H/W interrupt %d (cause %d)\n",
5898 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5904 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5907 struct gaudi_device *gaudi = hdev->asic_specific;
5910 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5911 return gaudi->events_stat_aggregate;
5914 *size = (u32) sizeof(gaudi->events_stat);
5915 return gaudi->events_stat;
5918 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5921 struct gaudi_device *gaudi = hdev->asic_specific;
5922 u32 status, timeout_usec;
5925 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5926 hdev->hard_reset_pending)
5930 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5932 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5934 mutex_lock(&hdev->mmu_cache_lock);
5936 /* L0 & L1 invalidation */
5937 WREG32(mmSTLB_INV_PS, 3);
5938 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5939 WREG32(mmSTLB_INV_PS, 2);
5941 rc = hl_poll_timeout(
5949 WREG32(mmSTLB_INV_SET, 0);
5951 mutex_unlock(&hdev->mmu_cache_lock);
5954 dev_err_ratelimited(hdev->dev,
5955 "MMU cache invalidation timeout\n");
5956 hl_device_reset(hdev, true, false);
5962 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5963 bool is_hard, u32 asid, u64 va, u64 size)
5965 struct gaudi_device *gaudi = hdev->asic_specific;
5966 u32 status, timeout_usec;
5971 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5972 hdev->hard_reset_pending)
5975 mutex_lock(&hdev->mmu_cache_lock);
5978 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5980 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5983 * TODO: currently invalidate entire L0 & L1 as in regular hard
5984 * invalidation. Need to apply invalidation of specific cache
5985 * lines with mask of ASID & VA & size.
5986 * Note that L1 with be flushed entirely in any case.
5989 /* L0 & L1 invalidation */
5990 inv_data = RREG32(mmSTLB_CACHE_INV);
5992 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5993 WREG32(mmSTLB_CACHE_INV,
5994 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5996 rc = hl_poll_timeout(
5998 mmSTLB_INV_CONSUMER_INDEX,
6004 mutex_unlock(&hdev->mmu_cache_lock);
6007 dev_err_ratelimited(hdev->dev,
6008 "MMU cache invalidation timeout\n");
6009 hl_device_reset(hdev, true, false);
6015 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6016 u32 asid, u64 phys_addr)
6018 u32 status, timeout_usec;
6022 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6024 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6026 WREG32(MMU_ASID, asid);
6027 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6028 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6029 WREG32(MMU_BUSY, 0x80000000);
6031 rc = hl_poll_timeout(
6035 !(status & 0x80000000),
6041 "Timeout during MMU hop0 config of asid %d\n", asid);
6048 static int gaudi_send_heartbeat(struct hl_device *hdev)
6050 struct gaudi_device *gaudi = hdev->asic_specific;
6052 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6055 return hl_fw_send_heartbeat(hdev);
6058 static int gaudi_cpucp_info_get(struct hl_device *hdev)
6060 struct gaudi_device *gaudi = hdev->asic_specific;
6061 struct asic_fixed_properties *prop = &hdev->asic_prop;
6064 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6067 rc = hl_fw_cpucp_info_get(hdev);
6071 if (!strlen(prop->cpucp_info.card_name))
6072 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6075 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6077 if (hdev->card_type == cpucp_card_type_pci)
6078 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6079 else if (hdev->card_type == cpucp_card_type_pmc)
6080 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6082 hdev->max_power = prop->max_power_default;
6087 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6090 struct gaudi_device *gaudi = hdev->asic_specific;
6091 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6092 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6093 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6094 bool is_idle = true, is_eng_idle, is_slave;
6098 mutex_lock(&gaudi->clk_gate_mutex);
6100 hdev->asic_funcs->disable_clock_gating(hdev);
6104 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6105 "--- ------- ------------ ---------- -------------\n");
6107 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6108 dma_id = gaudi_dma_assignment[i];
6109 offset = dma_id * DMA_QMAN_OFFSET;
6111 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6112 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6113 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6114 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6115 IS_DMA_IDLE(dma_core_sts0);
6116 is_idle &= is_eng_idle;
6119 *mask |= ((u64) !is_eng_idle) <<
6120 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6122 seq_printf(s, fmt, dma_id,
6123 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6124 qm_cgm_sts, dma_core_sts0);
6129 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6130 "--- ------- ------------ ---------- ----------\n");
6132 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6133 offset = i * TPC_QMAN_OFFSET;
6134 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6135 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6136 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6137 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138 IS_TPC_IDLE(tpc_cfg_sts);
6139 is_idle &= is_eng_idle;
6142 *mask |= ((u64) !is_eng_idle) <<
6143 (GAUDI_ENGINE_ID_TPC_0 + i);
6145 seq_printf(s, fmt, i,
6146 is_eng_idle ? "Y" : "N",
6147 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6152 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6153 "--- ------- ------------ ---------- -----------\n");
6155 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6156 offset = i * MME_QMAN_OFFSET;
6157 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6158 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6160 /* MME 1 & 3 are slaves, no need to check their QMANs */
6163 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6164 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6165 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6168 is_idle &= is_eng_idle;
6171 *mask |= ((u64) !is_eng_idle) <<
6172 (GAUDI_ENGINE_ID_MME_0 + i);
6175 seq_printf(s, fmt, i,
6176 is_eng_idle ? "Y" : "N",
6177 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6179 seq_printf(s, mme_slave_fmt, i,
6180 is_eng_idle ? "Y" : "N", "-",
6188 hdev->asic_funcs->set_clock_gating(hdev);
6190 mutex_unlock(&gaudi->clk_gate_mutex);
6195 static void gaudi_hw_queues_lock(struct hl_device *hdev)
6196 __acquires(&gaudi->hw_queues_lock)
6198 struct gaudi_device *gaudi = hdev->asic_specific;
6200 spin_lock(&gaudi->hw_queues_lock);
6203 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6204 __releases(&gaudi->hw_queues_lock)
6206 struct gaudi_device *gaudi = hdev->asic_specific;
6208 spin_unlock(&gaudi->hw_queues_lock);
6211 static u32 gaudi_get_pci_id(struct hl_device *hdev)
6213 return hdev->pdev->device;
6216 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6219 struct gaudi_device *gaudi = hdev->asic_specific;
6221 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6224 return hl_fw_get_eeprom_data(hdev, data, max_size);
6228 * this function should be used only during initialization and/or after reset,
6229 * when there are no active users.
6231 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6234 struct gaudi_device *gaudi = hdev->asic_specific;
6239 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6242 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6244 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6246 mutex_lock(&gaudi->clk_gate_mutex);
6248 hdev->asic_funcs->disable_clock_gating(hdev);
6250 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6251 lower_32_bits(tpc_kernel));
6252 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6253 upper_32_bits(tpc_kernel));
6255 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6256 lower_32_bits(tpc_kernel));
6257 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6258 upper_32_bits(tpc_kernel));
6259 /* set a valid LUT pointer, content is of no significance */
6260 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6261 lower_32_bits(tpc_kernel));
6262 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6263 upper_32_bits(tpc_kernel));
6265 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6266 lower_32_bits(CFG_BASE +
6267 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6269 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6270 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6271 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6272 /* wait a bit for the engine to start executing */
6273 usleep_range(1000, 1500);
6275 /* wait until engine has finished executing */
6276 rc = hl_poll_timeout(
6278 mmTPC0_CFG_STATUS + offset,
6280 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6281 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6287 "Timeout while waiting for TPC%d icache prefetch\n",
6289 hdev->asic_funcs->set_clock_gating(hdev);
6290 mutex_unlock(&gaudi->clk_gate_mutex);
6294 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6295 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6297 /* wait a bit for the engine to start executing */
6298 usleep_range(1000, 1500);
6300 /* wait until engine has finished executing */
6301 rc = hl_poll_timeout(
6303 mmTPC0_CFG_STATUS + offset,
6305 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6306 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6312 "Timeout while waiting for TPC%d vector pipe\n",
6314 hdev->asic_funcs->set_clock_gating(hdev);
6315 mutex_unlock(&gaudi->clk_gate_mutex);
6319 rc = hl_poll_timeout(
6321 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6327 hdev->asic_funcs->set_clock_gating(hdev);
6328 mutex_unlock(&gaudi->clk_gate_mutex);
6332 "Timeout while waiting for TPC%d kernel to execute\n",
6340 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6342 return RREG32(mmHW_STATE);
6345 static int gaudi_ctx_init(struct hl_ctx *ctx)
6350 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6352 return gaudi_cq_assignment[cq_idx];
6355 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6357 return sizeof(struct packet_msg_short) +
6358 sizeof(struct packet_msg_prot) * 2;
6361 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6363 return sizeof(struct packet_msg_short) * 4 +
6364 sizeof(struct packet_fence) +
6365 sizeof(struct packet_msg_prot) * 2;
6368 static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6370 struct hl_cb *cb = (struct hl_cb *) data;
6371 struct packet_msg_short *pkt;
6374 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6375 memset(pkt, 0, sizeof(*pkt));
6377 /* Inc by 1, Mode ADD */
6378 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6379 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6381 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6382 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6383 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6384 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6385 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6386 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6387 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6389 pkt->value = cpu_to_le32(value);
6390 pkt->ctl = cpu_to_le32(ctl);
6393 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6396 u32 ctl, pkt_size = sizeof(*pkt);
6398 memset(pkt, 0, pkt_size);
6400 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6401 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6402 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6403 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6404 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6405 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6407 pkt->value = cpu_to_le32(value);
6408 pkt->ctl = cpu_to_le32(ctl);
6413 static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6414 u16 sob_val, u16 addr)
6416 u32 ctl, value, pkt_size = sizeof(*pkt);
6417 u8 mask = ~(1 << (sob_id & 0x7));
6419 memset(pkt, 0, pkt_size);
6421 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6422 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6423 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6424 0); /* GREATER OR EQUAL*/
6425 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6427 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6428 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6429 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6430 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6431 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6432 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6433 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6435 pkt->value = cpu_to_le32(value);
6436 pkt->ctl = cpu_to_le32(ctl);
6441 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6443 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6445 memset(pkt, 0, pkt_size);
6447 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6448 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6449 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6451 ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6452 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6453 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6454 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6456 pkt->cfg = cpu_to_le32(cfg);
6457 pkt->ctl = cpu_to_le32(ctl);
6462 static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6463 u16 sob_val, u16 mon_id, u32 q_idx)
6465 struct hl_cb *cb = (struct hl_cb *) data;
6466 void *buf = (void *) (uintptr_t) cb->kernel_address;
6467 u64 monitor_base, fence_addr = 0;
6469 u16 msg_addr_offset;
6472 case GAUDI_QUEUE_ID_DMA_0_0:
6473 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6475 case GAUDI_QUEUE_ID_DMA_0_1:
6476 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6478 case GAUDI_QUEUE_ID_DMA_0_2:
6479 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6481 case GAUDI_QUEUE_ID_DMA_0_3:
6482 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6484 case GAUDI_QUEUE_ID_DMA_1_0:
6485 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6487 case GAUDI_QUEUE_ID_DMA_1_1:
6488 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6490 case GAUDI_QUEUE_ID_DMA_1_2:
6491 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6493 case GAUDI_QUEUE_ID_DMA_1_3:
6494 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6496 case GAUDI_QUEUE_ID_DMA_5_0:
6497 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6499 case GAUDI_QUEUE_ID_DMA_5_1:
6500 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6502 case GAUDI_QUEUE_ID_DMA_5_2:
6503 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6505 case GAUDI_QUEUE_ID_DMA_5_3:
6506 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6509 /* queue index should be valid here */
6510 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6515 fence_addr += CFG_BASE;
6518 * monitor_base should be the content of the base0 address registers,
6519 * so it will be added to the msg short offsets
6521 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6523 /* First monitor config packet: low address of the sync */
6525 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6528 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6531 /* Second monitor config packet: high address of the sync */
6533 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6536 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6540 * Third monitor config packet: the payload, i.e. what to write when the
6544 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6547 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6549 /* Fourth monitor config packet: bind the monitor to a sync object */
6551 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6553 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6557 size += gaudi_add_fence_pkt(buf + size);
6560 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6562 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6564 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6567 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6570 kref_init(&hw_sob->kref);
6573 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6575 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6576 HL_POWER9_HOST_MAGIC) {
6577 hdev->power9_64bit_dma_enable = 1;
6578 hdev->dma_mask = 64;
6580 hdev->power9_64bit_dma_enable = 0;
6581 hdev->dma_mask = 48;
6585 static u64 gaudi_get_device_time(struct hl_device *hdev)
6587 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6589 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6592 static const struct hl_asic_funcs gaudi_funcs = {
6593 .early_init = gaudi_early_init,
6594 .early_fini = gaudi_early_fini,
6595 .late_init = gaudi_late_init,
6596 .late_fini = gaudi_late_fini,
6597 .sw_init = gaudi_sw_init,
6598 .sw_fini = gaudi_sw_fini,
6599 .hw_init = gaudi_hw_init,
6600 .hw_fini = gaudi_hw_fini,
6601 .halt_engines = gaudi_halt_engines,
6602 .suspend = gaudi_suspend,
6603 .resume = gaudi_resume,
6604 .cb_mmap = gaudi_cb_mmap,
6605 .ring_doorbell = gaudi_ring_doorbell,
6606 .pqe_write = gaudi_pqe_write,
6607 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6608 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6609 .get_int_queue_base = gaudi_get_int_queue_base,
6610 .test_queues = gaudi_test_queues,
6611 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6612 .asic_dma_pool_free = gaudi_dma_pool_free,
6613 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6614 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6615 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6616 .cs_parser = gaudi_cs_parser,
6617 .asic_dma_map_sg = gaudi_dma_map_sg,
6618 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6619 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6620 .update_eq_ci = gaudi_update_eq_ci,
6621 .context_switch = gaudi_context_switch,
6622 .restore_phase_topology = gaudi_restore_phase_topology,
6623 .debugfs_read32 = gaudi_debugfs_read32,
6624 .debugfs_write32 = gaudi_debugfs_write32,
6625 .debugfs_read64 = gaudi_debugfs_read64,
6626 .debugfs_write64 = gaudi_debugfs_write64,
6627 .add_device_attr = gaudi_add_device_attr,
6628 .handle_eqe = gaudi_handle_eqe,
6629 .set_pll_profile = gaudi_set_pll_profile,
6630 .get_events_stat = gaudi_get_events_stat,
6631 .read_pte = gaudi_read_pte,
6632 .write_pte = gaudi_write_pte,
6633 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6634 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6635 .send_heartbeat = gaudi_send_heartbeat,
6636 .set_clock_gating = gaudi_set_clock_gating,
6637 .disable_clock_gating = gaudi_disable_clock_gating,
6638 .debug_coresight = gaudi_debug_coresight,
6639 .is_device_idle = gaudi_is_device_idle,
6640 .soft_reset_late_init = gaudi_soft_reset_late_init,
6641 .hw_queues_lock = gaudi_hw_queues_lock,
6642 .hw_queues_unlock = gaudi_hw_queues_unlock,
6643 .get_pci_id = gaudi_get_pci_id,
6644 .get_eeprom_data = gaudi_get_eeprom_data,
6645 .send_cpu_message = gaudi_send_cpu_message,
6646 .get_hw_state = gaudi_get_hw_state,
6647 .pci_bars_map = gaudi_pci_bars_map,
6648 .init_iatu = gaudi_init_iatu,
6651 .halt_coresight = gaudi_halt_coresight,
6652 .ctx_init = gaudi_ctx_init,
6653 .get_clk_rate = gaudi_get_clk_rate,
6654 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6655 .read_device_fw_version = gaudi_read_device_fw_version,
6656 .load_firmware_to_device = gaudi_load_firmware_to_device,
6657 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6658 .get_signal_cb_size = gaudi_get_signal_cb_size,
6659 .get_wait_cb_size = gaudi_get_wait_cb_size,
6660 .gen_signal_cb = gaudi_gen_signal_cb,
6661 .gen_wait_cb = gaudi_gen_wait_cb,
6662 .reset_sob = gaudi_reset_sob,
6663 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6664 .get_device_time = gaudi_get_device_time
6668 * gaudi_set_asic_funcs - set GAUDI function pointers
6670 * @hdev: pointer to hl_device structure
6673 void gaudi_set_asic_funcs(struct hl_device *hdev)
6675 hdev->asic_funcs = &gaudi_funcs;