1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2020 HabanaLabs, Ltd.
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
24 * Gaudi security scheme:
26 * 1. Host is protected by:
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
33 * 3. Configuration is protected by:
37 * MMU is always enabled.
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
54 * - MMU page tables area clear (happens on init)
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
66 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
68 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
73 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
83 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85 #define GAUDI_MAX_STRING_LEN 20
87 #define GAUDI_CB_POOL_CB_CNT 512
88 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98 #define GAUDI_ARB_WDT_TIMEOUT 0x1000000
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK (\
101 BIT(GAUDI_ENGINE_ID_MME_0) |\
102 BIT(GAUDI_ENGINE_ID_MME_2) |\
103 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
107 #define GAUDI_PLL_MAX 10
109 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
110 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
111 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
112 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
116 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
117 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
118 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
119 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
120 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
121 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
122 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
123 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
124 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
127 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
128 [0] = GAUDI_QUEUE_ID_DMA_0_0,
129 [1] = GAUDI_QUEUE_ID_DMA_0_1,
130 [2] = GAUDI_QUEUE_ID_DMA_0_2,
131 [3] = GAUDI_QUEUE_ID_DMA_0_3,
132 [4] = GAUDI_QUEUE_ID_DMA_1_0,
133 [5] = GAUDI_QUEUE_ID_DMA_1_1,
134 [6] = GAUDI_QUEUE_ID_DMA_1_2,
135 [7] = GAUDI_QUEUE_ID_DMA_1_3,
138 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
139 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
140 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
141 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
142 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
143 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
144 [PACKET_REPEAT] = sizeof(struct packet_repeat),
145 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
146 [PACKET_FENCE] = sizeof(struct packet_fence),
147 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
148 [PACKET_NOP] = sizeof(struct packet_nop),
149 [PACKET_STOP] = sizeof(struct packet_stop),
150 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
151 [PACKET_WAIT] = sizeof(struct packet_wait),
152 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
155 static inline bool validate_packet_id(enum packet_id id)
159 case PACKET_WREG_BULK:
160 case PACKET_MSG_LONG:
161 case PACKET_MSG_SHORT:
164 case PACKET_MSG_PROT:
169 case PACKET_ARB_POINT:
171 case PACKET_LOAD_AND_EXE:
178 static const char * const
179 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
180 "tpc_address_exceed_slm",
182 "tpc_spu_mac_overflow",
183 "tpc_spu_addsub_overflow",
184 "tpc_spu_abs_overflow",
185 "tpc_spu_fp_dst_nan_inf",
186 "tpc_spu_fp_dst_denorm",
187 "tpc_vpu_mac_overflow",
188 "tpc_vpu_addsub_overflow",
189 "tpc_vpu_abs_overflow",
190 "tpc_vpu_fp_dst_nan_inf",
191 "tpc_vpu_fp_dst_denorm",
193 "tpc_illegal_instruction",
194 "tpc_pc_wrap_around",
202 static const char * const
203 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
207 "CP error due to undefined OPCODE",
208 "CP encountered STOP OPCODE",
210 "CP WRREG32 or WRBULK returned error",
212 "FENCE 0 inc over max value and clipped",
213 "FENCE 1 inc over max value and clipped",
214 "FENCE 2 inc over max value and clipped",
215 "FENCE 3 inc over max value and clipped",
216 "FENCE 0 dec under min value and clipped",
217 "FENCE 1 dec under min value and clipped",
218 "FENCE 2 dec under min value and clipped",
219 "FENCE 3 dec under min value and clipped"
222 static const char * const
223 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
224 "Choice push while full error",
225 "Choice Q watchdog error",
226 "MSG AXI LBW returned with error"
229 enum gaudi_sm_sei_cause {
230 GAUDI_SM_SEI_SO_OVERFLOW,
231 GAUDI_SM_SEI_LBW_4B_UNALIGNED,
232 GAUDI_SM_SEI_AXI_RESPONSE_ERR
235 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
243 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
244 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
348 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 static s64 gaudi_state_dump_specs_props[SP_MAX] = {0};
353 struct ecc_info_extract_params {
357 bool disable_clock_gating;
360 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
362 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
363 struct hl_cs_job *job);
364 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
366 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
367 u32 num_regs, u32 val);
368 static int gaudi_schedule_register_memset(struct hl_device *hdev,
369 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
370 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
372 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
373 static int gaudi_cpucp_info_get(struct hl_device *hdev);
374 static void gaudi_disable_clock_gating(struct hl_device *hdev);
375 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
376 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
378 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
379 struct hl_gen_wait_properties *prop);
381 static inline enum hl_collective_mode
382 get_collective_mode(struct hl_device *hdev, u32 queue_id)
384 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
385 return HL_COLLECTIVE_MASTER;
387 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
388 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
389 return HL_COLLECTIVE_SLAVE;
391 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
392 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
393 return HL_COLLECTIVE_SLAVE;
395 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
396 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
397 return HL_COLLECTIVE_SLAVE;
399 return HL_COLLECTIVE_NOT_SUPPORTED;
402 static inline void set_default_power_values(struct hl_device *hdev)
404 struct asic_fixed_properties *prop = &hdev->asic_prop;
406 if (hdev->card_type == cpucp_card_type_pmc) {
407 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
408 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
410 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
411 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
415 static int gaudi_set_fixed_properties(struct hl_device *hdev)
417 struct asic_fixed_properties *prop = &hdev->asic_prop;
418 u32 num_sync_stream_queues = 0;
421 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
422 prop->hw_queues_props = kcalloc(prop->max_queues,
423 sizeof(struct hw_queue_properties),
426 if (!prop->hw_queues_props)
429 for (i = 0 ; i < prop->max_queues ; i++) {
430 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
431 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
432 prop->hw_queues_props[i].driver_only = 0;
433 prop->hw_queues_props[i].supports_sync_stream = 1;
434 prop->hw_queues_props[i].cb_alloc_flags =
436 num_sync_stream_queues++;
437 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
438 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
439 prop->hw_queues_props[i].driver_only = 1;
440 prop->hw_queues_props[i].supports_sync_stream = 0;
441 prop->hw_queues_props[i].cb_alloc_flags =
443 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
444 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
445 prop->hw_queues_props[i].driver_only = 0;
446 prop->hw_queues_props[i].supports_sync_stream = 0;
447 prop->hw_queues_props[i].cb_alloc_flags =
451 prop->hw_queues_props[i].collective_mode =
452 get_collective_mode(hdev, i);
455 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
456 prop->collective_first_sob = 0;
457 prop->collective_first_mon = 0;
459 /* 2 SOBs per internal queue stream are reserved for collective */
460 prop->sync_stream_first_sob =
461 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
462 * QMAN_STREAMS * HL_RSVD_SOBS;
464 /* 1 monitor per internal queue stream are reserved for collective
465 * 2 monitors per external queue stream are reserved for collective
467 prop->sync_stream_first_mon =
468 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
469 (NUMBER_OF_EXT_HW_QUEUES * 2);
471 prop->dram_base_address = DRAM_PHYS_BASE;
472 prop->dram_size = GAUDI_HBM_SIZE_32GB;
473 prop->dram_end_address = prop->dram_base_address +
475 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
477 prop->sram_base_address = SRAM_BASE_ADDR;
478 prop->sram_size = SRAM_SIZE;
479 prop->sram_end_address = prop->sram_base_address +
481 prop->sram_user_base_address = prop->sram_base_address +
482 SRAM_USER_BASE_OFFSET;
484 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
486 prop->mmu_pgt_size = 0x800000; /* 8MB */
488 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
489 prop->mmu_pte_size = HL_PTE_SIZE;
490 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
491 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
492 prop->dram_page_size = PAGE_SIZE_2MB;
493 prop->dram_supports_virtual_memory = false;
495 prop->pmmu.hop0_shift = HOP0_SHIFT;
496 prop->pmmu.hop1_shift = HOP1_SHIFT;
497 prop->pmmu.hop2_shift = HOP2_SHIFT;
498 prop->pmmu.hop3_shift = HOP3_SHIFT;
499 prop->pmmu.hop4_shift = HOP4_SHIFT;
500 prop->pmmu.hop0_mask = HOP0_MASK;
501 prop->pmmu.hop1_mask = HOP1_MASK;
502 prop->pmmu.hop2_mask = HOP2_MASK;
503 prop->pmmu.hop3_mask = HOP3_MASK;
504 prop->pmmu.hop4_mask = HOP4_MASK;
505 prop->pmmu.start_addr = VA_HOST_SPACE_START;
506 prop->pmmu.end_addr =
507 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
508 prop->pmmu.page_size = PAGE_SIZE_4KB;
509 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
511 /* PMMU and HPMMU are the same except of page size */
512 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
513 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
515 /* shifts and masks are the same in PMMU and DMMU */
516 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
517 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
518 prop->dmmu.end_addr = VA_HOST_SPACE_END;
519 prop->dmmu.page_size = PAGE_SIZE_2MB;
521 prop->cfg_size = CFG_SIZE;
522 prop->max_asid = MAX_ASID;
523 prop->num_of_events = GAUDI_EVENT_SIZE;
524 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
526 set_default_power_values(hdev);
528 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
529 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
531 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
532 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
534 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
537 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
539 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
540 prop->sync_stream_first_sob +
541 (num_sync_stream_queues * HL_RSVD_SOBS);
542 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
543 prop->sync_stream_first_mon +
544 (num_sync_stream_queues * HL_RSVD_MONS);
546 prop->first_available_user_msix_interrupt = USHRT_MAX;
548 for (i = 0 ; i < HL_MAX_DCORES ; i++)
549 prop->first_available_cq[i] = USHRT_MAX;
551 prop->fw_cpu_boot_dev_sts0_valid = false;
552 prop->fw_cpu_boot_dev_sts1_valid = false;
553 prop->hard_reset_done_by_fw = false;
554 prop->gic_interrupts_enable = true;
559 static int gaudi_pci_bars_map(struct hl_device *hdev)
561 static const char * const name[] = {"SRAM", "CFG", "HBM"};
562 bool is_wc[3] = {false, false, true};
565 rc = hl_pci_bars_map(hdev, name, is_wc);
569 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
570 (CFG_BASE - SPI_FLASH_BASE_ADDR);
575 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
577 struct gaudi_device *gaudi = hdev->asic_specific;
578 struct hl_inbound_pci_region pci_region;
582 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
585 if (hdev->asic_prop.iatu_done_by_fw)
588 /* Inbound Region 2 - Bar 4 - Point to HBM */
589 pci_region.mode = PCI_BAR_MATCH_MODE;
590 pci_region.bar = HBM_BAR_ID;
591 pci_region.addr = addr;
592 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
597 old_addr = gaudi->hbm_bar_cur_addr;
598 gaudi->hbm_bar_cur_addr = addr;
604 static int gaudi_init_iatu(struct hl_device *hdev)
606 struct hl_inbound_pci_region inbound_region;
607 struct hl_outbound_pci_region outbound_region;
610 if (hdev->asic_prop.iatu_done_by_fw)
613 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
614 inbound_region.mode = PCI_BAR_MATCH_MODE;
615 inbound_region.bar = SRAM_BAR_ID;
616 inbound_region.addr = SRAM_BASE_ADDR;
617 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
621 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
622 inbound_region.mode = PCI_BAR_MATCH_MODE;
623 inbound_region.bar = CFG_BAR_ID;
624 inbound_region.addr = SPI_FLASH_BASE_ADDR;
625 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
629 /* Inbound Region 2 - Bar 4 - Point to HBM */
630 inbound_region.mode = PCI_BAR_MATCH_MODE;
631 inbound_region.bar = HBM_BAR_ID;
632 inbound_region.addr = DRAM_PHYS_BASE;
633 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
637 hdev->asic_funcs->set_dma_mask_from_fw(hdev);
639 /* Outbound Region 0 - Point to Host */
640 outbound_region.addr = HOST_PHYS_BASE;
641 outbound_region.size = HOST_PHYS_SIZE;
642 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
648 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
650 return RREG32(mmHW_STATE);
653 static int gaudi_early_init(struct hl_device *hdev)
655 struct asic_fixed_properties *prop = &hdev->asic_prop;
656 struct pci_dev *pdev = hdev->pdev;
660 rc = gaudi_set_fixed_properties(hdev);
662 dev_err(hdev->dev, "Failed setting fixed properties\n");
666 /* Check BAR sizes */
667 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
669 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
671 (unsigned long long) pci_resource_len(pdev,
675 goto free_queue_props;
678 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
680 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
682 (unsigned long long) pci_resource_len(pdev,
686 goto free_queue_props;
689 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
691 /* If FW security is enabled at this point it means no access to ELBI */
692 if (hdev->asic_prop.fw_security_enabled) {
693 hdev->asic_prop.iatu_done_by_fw = true;
696 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
697 * decision can only be taken based on PCI ID security.
699 hdev->asic_prop.gic_interrupts_enable = false;
703 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
706 goto free_queue_props;
708 /* Check whether FW is configuring iATU */
709 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
710 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
711 hdev->asic_prop.iatu_done_by_fw = true;
714 rc = hl_pci_init(hdev);
716 goto free_queue_props;
718 /* Before continuing in the initialization, we need to read the preboot
719 * version to determine whether we run with a security-enabled firmware
721 rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
723 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
725 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
727 if (hdev->reset_on_preboot_fail)
728 hdev->asic_funcs->hw_fini(hdev, true);
732 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
734 "H/W state is dirty, must reset before initializing\n");
735 hdev->asic_funcs->hw_fini(hdev, true);
743 kfree(hdev->asic_prop.hw_queues_props);
747 static int gaudi_early_fini(struct hl_device *hdev)
749 kfree(hdev->asic_prop.hw_queues_props);
756 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
758 * @hdev: pointer to hl_device structure
761 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
763 struct asic_fixed_properties *prop = &hdev->asic_prop;
764 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
765 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
768 if (hdev->asic_prop.fw_security_enabled) {
769 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
774 freq = pll_freq_arr[2];
776 /* Backward compatibility */
777 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
778 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
779 nr = RREG32(mmPSOC_CPU_PLL_NR);
780 nf = RREG32(mmPSOC_CPU_PLL_NF);
781 od = RREG32(mmPSOC_CPU_PLL_OD);
783 if (div_sel == DIV_SEL_REF_CLK ||
784 div_sel == DIV_SEL_DIVIDED_REF) {
785 if (div_sel == DIV_SEL_REF_CLK)
788 freq = PLL_REF_CLK / (div_fctr + 1);
789 } else if (div_sel == DIV_SEL_PLL_CLK ||
790 div_sel == DIV_SEL_DIVIDED_PLL) {
791 pll_clk = PLL_REF_CLK * (nf + 1) /
792 ((nr + 1) * (od + 1));
793 if (div_sel == DIV_SEL_PLL_CLK)
796 freq = pll_clk / (div_fctr + 1);
799 "Received invalid div select value: %d",
805 prop->psoc_timestamp_frequency = freq;
806 prop->psoc_pci_pll_nr = nr;
807 prop->psoc_pci_pll_nf = nf;
808 prop->psoc_pci_pll_od = od;
809 prop->psoc_pci_pll_div_factor = div_fctr;
814 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
815 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
817 struct asic_fixed_properties *prop = &hdev->asic_prop;
818 struct packet_lin_dma *init_tpc_mem_pkt;
819 struct hl_cs_job *job;
826 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
830 init_tpc_mem_pkt = cb->kernel_address;
831 cb_size = sizeof(*init_tpc_mem_pkt);
832 memset(init_tpc_mem_pkt, 0, cb_size);
834 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
836 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
837 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
838 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
839 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
841 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
843 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
844 dst_addr = (prop->sram_user_base_address &
845 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
846 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
847 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
849 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
851 dev_err(hdev->dev, "Failed to allocate a new job\n");
858 atomic_inc(&job->user_cb->cs_cnt);
859 job->user_cb_size = cb_size;
860 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
861 job->patched_cb = job->user_cb;
862 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
864 hl_debugfs_add_job(hdev, job);
866 rc = gaudi_send_job_on_qman0(hdev, job);
871 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
872 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
878 hl_userptr_delete_list(hdev, &job->userptr_list);
879 hl_debugfs_remove_job(hdev, job);
881 atomic_dec(&cb->cs_cnt);
885 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
891 * gaudi_init_tpc_mem() - Initialize TPC memories.
892 * @hdev: Pointer to hl_device structure.
894 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
896 * Return: 0 for success, negative value for error.
898 static int gaudi_init_tpc_mem(struct hl_device *hdev)
900 const struct firmware *fw;
903 dma_addr_t dma_handle;
907 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
908 if (rc == -EINTR && count-- > 0) {
914 dev_err(hdev->dev, "Failed to load firmware file %s\n",
920 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
921 &dma_handle, GFP_KERNEL | __GFP_ZERO);
924 "Failed to allocate %zu of dma memory for TPC kernel\n",
930 memcpy(cpu_addr, fw->data, fw_size);
932 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
934 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
938 release_firmware(fw);
942 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
944 struct gaudi_device *gaudi = hdev->asic_specific;
945 struct gaudi_collective_properties *prop = &gaudi->collective_props;
946 struct hl_hw_queue *q;
947 u32 i, sob_id, sob_group_id, queue_id;
949 /* Iterate through SOB groups and assign a SOB for each slave queue */
951 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
952 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
954 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
955 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
956 q = &hdev->kernel_queues[queue_id + (4 * i)];
957 q->sync_stream_prop.collective_sob_id = sob_id + i;
960 /* Both DMA5 and TPC7 use the same resources since only a single
961 * engine need to participate in the reduction process
963 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
964 q = &hdev->kernel_queues[queue_id];
965 q->sync_stream_prop.collective_sob_id =
966 sob_id + NIC_NUMBER_OF_ENGINES;
968 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
969 q = &hdev->kernel_queues[queue_id];
970 q->sync_stream_prop.collective_sob_id =
971 sob_id + NIC_NUMBER_OF_ENGINES;
974 static void gaudi_sob_group_hw_reset(struct kref *ref)
976 struct gaudi_hw_sob_group *hw_sob_group =
977 container_of(ref, struct gaudi_hw_sob_group, kref);
978 struct hl_device *hdev = hw_sob_group->hdev;
982 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
983 hw_sob_group->base_sob_id * 4;
984 rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
985 base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
988 "failed resetting sob group - sob base %u, count %u",
989 hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
991 kref_init(&hw_sob_group->kref);
994 static void gaudi_sob_group_reset_error(struct kref *ref)
996 struct gaudi_hw_sob_group *hw_sob_group =
997 container_of(ref, struct gaudi_hw_sob_group, kref);
998 struct hl_device *hdev = hw_sob_group->hdev;
1001 "SOB release shouldn't be called here, base_sob_id: %d\n",
1002 hw_sob_group->base_sob_id);
1005 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1007 struct gaudi_collective_properties *prop;
1010 prop = &gaudi->collective_props;
1012 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1014 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1015 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1016 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1017 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1018 /* Set collective engine bit */
1019 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1020 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1023 static int gaudi_collective_init(struct hl_device *hdev)
1025 u32 i, sob_id, reserved_sobs_per_group;
1026 struct gaudi_collective_properties *prop;
1027 struct gaudi_device *gaudi;
1029 gaudi = hdev->asic_specific;
1030 prop = &gaudi->collective_props;
1031 sob_id = hdev->asic_prop.collective_first_sob;
1033 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1034 reserved_sobs_per_group =
1035 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1037 /* Init SOB groups */
1038 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1039 prop->hw_sob_group[i].hdev = hdev;
1040 prop->hw_sob_group[i].base_sob_id = sob_id;
1041 sob_id += reserved_sobs_per_group;
1042 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1045 for (i = 0 ; i < QMAN_STREAMS; i++) {
1046 prop->next_sob_group_val[i] = 1;
1047 prop->curr_sob_group_idx[i] = 0;
1048 gaudi_collective_map_sobs(hdev, i);
1051 gaudi_collective_mstr_sob_mask_set(gaudi);
1056 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1058 struct gaudi_device *gaudi = hdev->asic_specific;
1059 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1061 kref_put(&cprop->hw_sob_group[sob_group].kref,
1062 gaudi_sob_group_hw_reset);
1065 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1066 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1068 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1069 struct gaudi_collective_properties *cprop;
1070 struct hl_gen_wait_properties wait_prop;
1071 struct hl_sync_stream_properties *prop;
1072 struct gaudi_device *gaudi;
1074 gaudi = hdev->asic_specific;
1075 cprop = &gaudi->collective_props;
1076 queue_id = job->hw_queue_id;
1077 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1080 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1081 master_monitor = prop->collective_mstr_mon_id[0];
1083 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1086 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1087 master_sob_base, cprop->mstr_sob_mask[0],
1088 cprop->next_sob_group_val[stream],
1089 master_monitor, queue_id);
1091 wait_prop.data = (void *) job->patched_cb;
1092 wait_prop.sob_base = master_sob_base;
1093 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1094 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1095 wait_prop.mon_id = master_monitor;
1096 wait_prop.q_idx = queue_id;
1097 wait_prop.size = cb_size;
1098 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1100 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1101 master_monitor = prop->collective_mstr_mon_id[1];
1104 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1105 master_sob_base, cprop->mstr_sob_mask[1],
1106 cprop->next_sob_group_val[stream],
1107 master_monitor, queue_id);
1109 wait_prop.sob_base = master_sob_base;
1110 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1111 wait_prop.mon_id = master_monitor;
1112 wait_prop.size = cb_size;
1113 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1116 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1117 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1119 struct hl_gen_wait_properties wait_prop;
1120 struct hl_sync_stream_properties *prop;
1121 u32 queue_id, cb_size = 0;
1123 queue_id = job->hw_queue_id;
1124 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1126 /* Add to wait CBs using slave monitor */
1127 wait_prop.data = (void *) job->user_cb;
1128 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1129 wait_prop.sob_mask = 0x1;
1130 wait_prop.sob_val = cs_cmpl->sob_val;
1131 wait_prop.mon_id = prop->collective_slave_mon_id;
1132 wait_prop.q_idx = queue_id;
1133 wait_prop.size = cb_size;
1136 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1137 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1138 prop->collective_slave_mon_id, queue_id);
1140 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1143 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1144 prop->collective_sob_id, queue_id);
1146 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1147 prop->collective_sob_id, cb_size, false);
1150 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1152 struct hl_cs_compl *signal_cs_cmpl =
1153 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1154 struct hl_cs_compl *cs_cmpl =
1155 container_of(cs->fence, struct hl_cs_compl, base_fence);
1156 struct gaudi_collective_properties *cprop;
1157 u32 stream, queue_id, sob_group_offset;
1158 struct gaudi_device *gaudi;
1159 struct hl_device *hdev;
1160 struct hl_cs_job *job;
1165 gaudi = hdev->asic_specific;
1166 cprop = &gaudi->collective_props;
1168 /* copy the SOB id and value of the signal CS */
1169 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1170 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1172 /* Calculate the stream from collective master queue (1st job) */
1173 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1174 stream = job->hw_queue_id % 4;
1176 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1178 list_for_each_entry(job, &cs->job_list, cs_node) {
1179 queue_id = job->hw_queue_id;
1181 if (hdev->kernel_queues[queue_id].collective_mode ==
1182 HL_COLLECTIVE_MASTER)
1183 gaudi_collective_master_init_job(hdev, job, stream,
1186 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1189 cs_cmpl->sob_group = sob_group_offset;
1191 /* Handle sob group kref and wraparound */
1192 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1193 cprop->next_sob_group_val[stream]++;
1195 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1197 * Decrement as we reached the max value.
1198 * The release function won't be called here as we've
1199 * just incremented the refcount.
1201 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1202 gaudi_sob_group_reset_error);
1203 cprop->next_sob_group_val[stream] = 1;
1204 /* only two SOBs are currently in use */
1205 cprop->curr_sob_group_idx[stream] =
1206 (cprop->curr_sob_group_idx[stream] + 1) &
1209 gaudi_collective_map_sobs(hdev, stream);
1211 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1212 cprop->curr_sob_group_idx[stream], stream);
1215 /* Increment kref since all slave queues are now waiting on it */
1216 kref_get(&cs_cmpl->hw_sob->kref);
1218 * Must put the signal fence after the SOB refcnt increment so
1219 * the SOB refcnt won't turn 0 and reset the SOB before the
1220 * wait CS was submitted.
1223 hl_fence_put(cs->signal_fence);
1224 cs->signal_fence = NULL;
1227 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1228 struct hl_ctx *ctx, struct hl_cs *cs,
1229 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1231 struct hw_queue_properties *hw_queue_prop;
1232 struct hl_cs_counters_atomic *cntr;
1233 struct hl_cs_job *job;
1238 cntr = &hdev->aggregated_cs_counters;
1240 if (mode == HL_COLLECTIVE_MASTER) {
1241 /* CB size of collective master queue contains
1242 * 4 msg short packets for monitor 1 configuration
1244 * 4 msg short packets for monitor 2 configuration
1246 * 2 msg prot packets for completion and MSI-X
1248 cb_size = sizeof(struct packet_msg_short) * 8 +
1249 sizeof(struct packet_fence) * 2 +
1250 sizeof(struct packet_msg_prot) * 2;
1253 /* CB size of collective slave queues contains
1254 * 4 msg short packets for monitor configuration
1256 * 1 additional msg short packet for sob signal
1258 cb_size = sizeof(struct packet_msg_short) * 5 +
1259 sizeof(struct packet_fence);
1263 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1264 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1266 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1267 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1268 dev_err(hdev->dev, "Failed to allocate a new job\n");
1272 /* Allocate internal mapped CB for non patched CBs */
1273 cb = hl_cb_kernel_create(hdev, cb_size,
1274 hdev->mmu_enable && !patched_cb);
1276 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1277 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1285 atomic_inc(&job->user_cb->cs_cnt);
1286 job->user_cb_size = cb_size;
1287 job->hw_queue_id = queue_id;
1290 * No need in parsing, user CB is the patched CB.
1291 * We call hl_cb_destroy() out of two reasons - we don't need
1292 * the CB in the CB idr anymore and to decrement its refcount as
1293 * it was incremented inside hl_cb_kernel_create().
1296 job->patched_cb = job->user_cb;
1298 job->patched_cb = NULL;
1300 job->job_cb_size = job->user_cb_size;
1301 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1303 /* increment refcount as for external queues we get completion */
1304 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1307 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1309 list_add_tail(&job->cs_node, &cs->job_list);
1311 hl_debugfs_add_job(hdev, job);
1316 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1317 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1318 u32 collective_engine_id)
1320 struct gaudi_device *gaudi = hdev->asic_specific;
1321 struct hw_queue_properties *hw_queue_prop;
1322 u32 queue_id, collective_queue, num_jobs;
1323 u32 stream, nic_queue, nic_idx = 0;
1327 /* Verify wait queue id is configured as master */
1328 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1329 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1331 "Queue %d is not configured as collective master\n",
1336 /* Verify engine id is supported */
1337 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1338 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1340 "Collective wait does not support engine %u\n",
1341 collective_engine_id);
1345 stream = wait_queue_id % 4;
1347 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1348 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1350 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1352 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1353 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1355 /* First job goes to the collective master queue, it will wait for
1356 * the collective slave queues to finish execution.
1357 * The synchronization is done using two monitors:
1358 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1359 * reduction engine (DMA5/TPC7).
1361 * Rest of the jobs goes to the collective slave queues which will
1362 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1364 for (i = 0 ; i < num_jobs ; i++) {
1366 queue_id = wait_queue_id;
1367 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1368 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1370 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1371 if (gaudi->hw_cap_initialized &
1372 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1377 queue_id = nic_queue;
1384 queue_id = collective_queue;
1387 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1388 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1398 static int gaudi_late_init(struct hl_device *hdev)
1400 struct gaudi_device *gaudi = hdev->asic_specific;
1403 rc = gaudi->cpucp_info_get(hdev);
1405 dev_err(hdev->dev, "Failed to get cpucp info\n");
1409 if ((hdev->card_type == cpucp_card_type_pci) &&
1410 (hdev->nic_ports_mask & 0x3)) {
1412 "PCI card detected, only 8 ports are enabled\n");
1413 hdev->nic_ports_mask &= ~0x3;
1415 /* Stop and disable unused NIC QMANs */
1416 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1417 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1418 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1420 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1421 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1422 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1424 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1425 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1427 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1430 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1432 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1436 rc = gaudi_fetch_psoc_frequency(hdev);
1438 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1439 goto disable_pci_access;
1442 rc = gaudi_mmu_clear_pgt_range(hdev);
1444 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1445 goto disable_pci_access;
1448 rc = gaudi_init_tpc_mem(hdev);
1450 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1451 goto disable_pci_access;
1454 rc = gaudi_collective_init(hdev);
1456 dev_err(hdev->dev, "Failed to init collective\n");
1457 goto disable_pci_access;
1463 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1468 static void gaudi_late_fini(struct hl_device *hdev)
1470 const struct hwmon_channel_info **channel_info_arr;
1473 if (!hdev->hl_chip_info->info)
1476 channel_info_arr = hdev->hl_chip_info->info;
1478 while (channel_info_arr[i]) {
1479 kfree(channel_info_arr[i]->config);
1480 kfree(channel_info_arr[i]);
1484 kfree(channel_info_arr);
1486 hdev->hl_chip_info->info = NULL;
1489 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1491 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1492 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1496 * The device CPU works with 40-bits addresses, while bit 39 must be set
1497 * to '1' when accessing the host.
1498 * Bits 49:39 of the full host address are saved for a later
1499 * configuration of the HW to perform extension to 50 bits.
1500 * Because there is a single HW register that holds the extension bits,
1501 * these bits must be identical in all allocated range.
1504 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1506 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1507 HL_CPU_ACCESSIBLE_MEM_SIZE,
1509 GFP_KERNEL | __GFP_ZERO);
1510 if (!virt_addr_arr[i]) {
1512 goto free_dma_mem_arr;
1515 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1516 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1517 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1521 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1523 "MSB of CPU accessible DMA memory are not identical in all range\n");
1525 goto free_dma_mem_arr;
1528 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1529 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1530 hdev->cpu_pci_msb_addr =
1531 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1533 if (!hdev->asic_prop.fw_security_enabled)
1534 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1537 for (j = 0 ; j < i ; j++)
1538 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1539 HL_CPU_ACCESSIBLE_MEM_SIZE,
1546 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1548 struct gaudi_device *gaudi = hdev->asic_specific;
1549 struct gaudi_internal_qman_info *q;
1552 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1553 q = &gaudi->internal_qmans[i];
1554 if (!q->pq_kernel_addr)
1556 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1562 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1564 struct gaudi_device *gaudi = hdev->asic_specific;
1565 struct gaudi_internal_qman_info *q;
1568 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1569 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1572 q = &gaudi->internal_qmans[i];
1575 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1576 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1578 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1579 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1581 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1582 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1584 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1585 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1588 dev_err(hdev->dev, "Bad internal queue index %d", i);
1590 goto free_internal_qmans_pq_mem;
1593 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1596 GFP_KERNEL | __GFP_ZERO);
1597 if (!q->pq_kernel_addr) {
1599 goto free_internal_qmans_pq_mem;
1605 free_internal_qmans_pq_mem:
1606 gaudi_free_internal_qmans_pq_mem(hdev);
1610 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1612 struct asic_fixed_properties *prop = &hdev->asic_prop;
1613 struct pci_mem_region *region;
1616 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1617 region->region_base = CFG_BASE;
1618 region->region_size = CFG_SIZE;
1619 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1620 region->bar_size = CFG_BAR_SIZE;
1621 region->bar_id = CFG_BAR_ID;
1625 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1626 region->region_base = SRAM_BASE_ADDR;
1627 region->region_size = SRAM_SIZE;
1628 region->offset_in_bar = 0;
1629 region->bar_size = SRAM_BAR_SIZE;
1630 region->bar_id = SRAM_BAR_ID;
1634 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1635 region->region_base = DRAM_PHYS_BASE;
1636 region->region_size = hdev->asic_prop.dram_size;
1637 region->offset_in_bar = 0;
1638 region->bar_size = prop->dram_pci_bar_size;
1639 region->bar_id = HBM_BAR_ID;
1643 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1644 region->region_base = PSOC_SCRATCHPAD_ADDR;
1645 region->region_size = PSOC_SCRATCHPAD_SIZE;
1646 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1647 region->bar_size = CFG_BAR_SIZE;
1648 region->bar_id = CFG_BAR_ID;
1652 static int gaudi_sw_init(struct hl_device *hdev)
1654 struct gaudi_device *gaudi;
1655 u32 i, event_id = 0;
1658 /* Allocate device structure */
1659 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1663 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1664 if (gaudi_irq_map_table[i].valid) {
1665 if (event_id == GAUDI_EVENT_SIZE) {
1667 "Event array exceeds the limit of %u events\n",
1670 goto free_gaudi_device;
1673 gaudi->events[event_id++] =
1674 gaudi_irq_map_table[i].fc_id;
1678 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1680 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1682 hdev->asic_specific = gaudi;
1684 /* Create DMA pool for small allocations */
1685 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1686 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1687 if (!hdev->dma_pool) {
1688 dev_err(hdev->dev, "failed to create DMA pool\n");
1690 goto free_gaudi_device;
1693 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1697 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1698 if (!hdev->cpu_accessible_dma_pool) {
1700 "Failed to create CPU accessible DMA pool\n");
1702 goto free_cpu_dma_mem;
1705 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1706 (uintptr_t) hdev->cpu_accessible_dma_mem,
1707 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1710 "Failed to add memory to CPU accessible DMA pool\n");
1712 goto free_cpu_accessible_dma_pool;
1715 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1717 goto free_cpu_accessible_dma_pool;
1719 spin_lock_init(&gaudi->hw_queues_lock);
1720 mutex_init(&gaudi->clk_gate_mutex);
1722 hdev->supports_sync_stream = true;
1723 hdev->supports_coresight = true;
1724 hdev->supports_staged_submission = true;
1726 gaudi_set_pci_memory_regions(hdev);
1730 free_cpu_accessible_dma_pool:
1731 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1733 if (!hdev->asic_prop.fw_security_enabled)
1734 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1735 hdev->cpu_pci_msb_addr);
1736 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1737 HL_CPU_ACCESSIBLE_MEM_SIZE,
1738 hdev->cpu_accessible_dma_mem,
1739 hdev->cpu_accessible_dma_address);
1741 dma_pool_destroy(hdev->dma_pool);
1747 static int gaudi_sw_fini(struct hl_device *hdev)
1749 struct gaudi_device *gaudi = hdev->asic_specific;
1751 gaudi_free_internal_qmans_pq_mem(hdev);
1753 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1755 if (!hdev->asic_prop.fw_security_enabled)
1756 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1757 hdev->cpu_pci_msb_addr);
1759 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1760 HL_CPU_ACCESSIBLE_MEM_SIZE,
1761 hdev->cpu_accessible_dma_mem,
1762 hdev->cpu_accessible_dma_address);
1764 dma_pool_destroy(hdev->dma_pool);
1766 mutex_destroy(&gaudi->clk_gate_mutex);
1773 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1775 struct hl_device *hdev = arg;
1781 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1782 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1784 hl_irq_handler_eq(irq, &hdev->event_queue);
1790 * For backward compatibility, new MSI interrupts should be set after the
1791 * existing CPU and NIC interrupts.
1793 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1798 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1799 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1800 GAUDI_EVENT_QUEUE_MSI_IDX);
1802 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1803 (nr + NIC_NUMBER_OF_ENGINES + 1);
1805 return pci_irq_vector(hdev->pdev, msi_vec);
1808 static int gaudi_enable_msi_single(struct hl_device *hdev)
1812 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1814 irq = gaudi_pci_irq_vector(hdev, 0, false);
1815 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1816 "gaudi single msi", hdev);
1819 "Failed to request single MSI IRQ\n");
1824 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1826 int cq_cnt = hdev->asic_prop.completion_queues_count;
1827 int rc, i, irq_cnt_init, irq;
1829 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1830 irq = gaudi_pci_irq_vector(hdev, i, false);
1831 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1832 &hdev->completion_queue[i]);
1834 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1839 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1840 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1841 &hdev->event_queue);
1843 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1850 for (i = 0 ; i < irq_cnt_init ; i++)
1851 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1852 &hdev->completion_queue[i]);
1856 static int gaudi_enable_msi(struct hl_device *hdev)
1858 struct gaudi_device *gaudi = hdev->asic_specific;
1861 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1864 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1866 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1870 if (rc < NUMBER_OF_INTERRUPTS) {
1871 gaudi->multi_msi_mode = false;
1872 rc = gaudi_enable_msi_single(hdev);
1874 gaudi->multi_msi_mode = true;
1875 rc = gaudi_enable_msi_multi(hdev);
1879 goto free_pci_irq_vectors;
1881 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1885 free_pci_irq_vectors:
1886 pci_free_irq_vectors(hdev->pdev);
1890 static void gaudi_sync_irqs(struct hl_device *hdev)
1892 struct gaudi_device *gaudi = hdev->asic_specific;
1893 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1895 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1898 /* Wait for all pending IRQs to be finished */
1899 if (gaudi->multi_msi_mode) {
1900 for (i = 0 ; i < cq_cnt ; i++)
1901 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1903 synchronize_irq(gaudi_pci_irq_vector(hdev,
1904 GAUDI_EVENT_QUEUE_MSI_IDX,
1907 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1911 static void gaudi_disable_msi(struct hl_device *hdev)
1913 struct gaudi_device *gaudi = hdev->asic_specific;
1914 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1916 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1919 gaudi_sync_irqs(hdev);
1921 if (gaudi->multi_msi_mode) {
1922 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1924 free_irq(irq, &hdev->event_queue);
1926 for (i = 0 ; i < cq_cnt ; i++) {
1927 irq = gaudi_pci_irq_vector(hdev, i, false);
1928 free_irq(irq, &hdev->completion_queue[i]);
1931 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1934 pci_free_irq_vectors(hdev->pdev);
1936 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1939 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1941 struct gaudi_device *gaudi = hdev->asic_specific;
1943 if (hdev->asic_prop.fw_security_enabled)
1946 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
1947 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
1950 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1953 if (!hdev->sram_scrambler_enable)
1956 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1957 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1959 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1961 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1963 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1964 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1965 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1966 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1967 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1968 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1969 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1970 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1971 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1973 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1974 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1975 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1976 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1977 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1978 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1979 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1980 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1981 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1982 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1983 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1984 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1985 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1986 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1987 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1988 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1990 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1991 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1992 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1993 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1994 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1995 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1996 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1997 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1998 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1999 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2000 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2001 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2002 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2003 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2004 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2005 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2007 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2010 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2012 struct gaudi_device *gaudi = hdev->asic_specific;
2014 if (hdev->asic_prop.fw_security_enabled)
2017 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2018 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2021 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2024 if (!hdev->dram_scrambler_enable)
2027 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2028 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2029 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2030 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2031 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2032 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2033 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2034 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2035 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2036 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2037 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2038 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2039 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2040 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2041 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2042 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2044 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2045 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2046 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2047 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2048 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2049 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2050 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2051 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2052 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2053 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2054 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2055 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2056 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2057 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2058 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2059 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2061 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2062 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2063 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2064 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2065 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2066 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2067 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2068 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2069 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2070 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2071 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2072 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2073 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2074 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2075 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2076 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2078 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2081 static void gaudi_init_e2e(struct hl_device *hdev)
2083 if (hdev->asic_prop.fw_security_enabled)
2086 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2087 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2090 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2091 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2092 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2093 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2095 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2096 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2097 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2098 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2100 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2101 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2102 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2103 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2105 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2106 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2107 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2108 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2110 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2111 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2112 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2113 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2115 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2116 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2117 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2118 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2120 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2121 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2122 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2123 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2125 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2126 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2127 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2128 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2130 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2131 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2132 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2133 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2135 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2136 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2137 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2138 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2140 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2141 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2142 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2143 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2145 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2146 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2147 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2148 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2150 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2151 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2152 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2153 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2155 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2156 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2157 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2158 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2160 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2161 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2162 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2163 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2165 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2166 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2167 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2168 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2170 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2171 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2172 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2173 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2175 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2176 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2177 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2178 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2180 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2181 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2182 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2183 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2185 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2186 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2187 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2188 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2190 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2191 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2192 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2193 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2195 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2196 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2197 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2198 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2200 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2201 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2202 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2203 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2205 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2206 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2207 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2208 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2210 if (!hdev->dram_scrambler_enable) {
2211 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2212 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2213 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2214 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2216 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2217 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2218 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2219 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2221 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2222 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2223 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2224 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2226 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2227 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2228 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2229 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2231 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2232 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2233 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2234 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2236 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2237 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2238 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2239 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2241 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2242 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2243 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2244 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2246 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2247 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2248 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2249 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2251 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2252 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2253 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2254 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2256 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2257 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2258 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2259 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2261 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2262 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2263 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2264 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2266 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2267 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2268 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2269 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2271 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2272 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2273 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2274 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2276 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2277 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2278 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2279 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2281 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2282 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2283 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2284 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2286 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2287 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2288 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2289 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2291 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2292 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2296 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2297 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2301 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2302 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2306 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2307 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2311 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2312 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2316 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2317 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2321 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2322 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2326 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2327 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2332 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2333 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2335 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2337 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2338 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2340 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2342 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2343 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2345 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2347 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2348 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2350 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2352 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2353 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2355 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2357 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2358 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2360 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2362 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2363 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2365 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2367 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2368 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2370 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2372 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2373 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2375 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2377 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2378 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2380 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2382 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2383 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2385 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2387 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2388 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2389 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2390 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2392 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2393 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2394 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2395 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2398 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2399 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2400 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2403 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2404 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2405 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2408 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2409 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2410 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2413 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2415 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2417 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2418 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2420 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2422 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2423 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2425 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2427 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2428 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2429 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2430 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2432 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2433 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2434 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2435 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2437 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2438 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2439 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2440 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2442 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2443 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2444 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2445 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2447 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2448 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2449 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2450 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2453 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2455 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2457 if (hdev->asic_prop.fw_security_enabled)
2460 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2461 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2464 hbm0_wr = 0x33333333;
2465 hbm0_rd = 0x77777777;
2466 hbm1_wr = 0x55555555;
2467 hbm1_rd = 0xDDDDDDDD;
2469 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2470 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2471 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2472 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2474 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2475 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2476 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2477 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2479 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2480 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2481 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2482 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2484 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2485 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2486 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2487 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2489 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2490 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2491 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2492 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2493 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2496 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2499 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2503 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2506 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2509 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2512 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2516 static void gaudi_init_golden_registers(struct hl_device *hdev)
2521 gaudi_init_e2e(hdev);
2522 gaudi_init_hbm_cred(hdev);
2524 for (tpc_id = 0, tpc_offset = 0;
2525 tpc_id < TPC_NUMBER_OF_ENGINES;
2526 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2527 /* Mask all arithmetic interrupts from TPC */
2528 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2529 /* Set 16 cache lines */
2530 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2531 ICACHE_FETCH_LINE_NUM, 2);
2534 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2535 for (i = 0 ; i < 128 ; i += 8)
2536 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2538 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2544 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2545 int qman_id, dma_addr_t qman_pq_addr)
2547 struct cpu_dyn_regs *dyn_regs =
2548 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2549 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2550 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2551 u32 q_off, dma_qm_offset;
2552 u32 dma_qm_err_cfg, irq_handler_offset;
2554 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2556 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2557 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2559 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560 so_base_en_lo = lower_32_bits(CFG_BASE +
2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562 so_base_en_hi = upper_32_bits(CFG_BASE +
2563 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2565 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2567 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568 so_base_ws_lo = lower_32_bits(CFG_BASE +
2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570 so_base_ws_hi = upper_32_bits(CFG_BASE +
2571 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573 q_off = dma_qm_offset + qman_id * 4;
2575 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2576 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2578 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2579 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2580 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2582 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2583 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2584 QMAN_LDMA_SRC_OFFSET);
2585 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2586 QMAN_LDMA_DST_OFFSET);
2588 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2597 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2599 /* The following configuration is needed only once per QMAN */
2601 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2602 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2603 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2605 /* Configure RAZWI IRQ */
2606 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2607 if (hdev->stop_on_err)
2609 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2611 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2613 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2614 lower_32_bits(CFG_BASE + irq_handler_offset));
2615 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2616 upper_32_bits(CFG_BASE + irq_handler_offset));
2618 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2619 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2622 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2623 QM_ARB_ERR_MSG_EN_MASK);
2625 /* Increase ARB WDT to support streams architecture */
2626 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2627 GAUDI_ARB_WDT_TIMEOUT);
2629 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630 QMAN_EXTERNAL_MAKE_TRUSTED);
2632 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2636 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2638 struct cpu_dyn_regs *dyn_regs =
2639 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642 u32 irq_handler_offset;
2644 /* Set to maximum possible according to physical size */
2645 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2648 /* WA for H/W bug H3-2116 */
2649 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2651 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2652 if (hdev->stop_on_err)
2653 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2655 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2657 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2661 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662 lower_32_bits(CFG_BASE + irq_handler_offset));
2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664 upper_32_bits(CFG_BASE + irq_handler_offset));
2666 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668 WREG32(mmDMA0_CORE_PROT + dma_offset,
2669 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670 /* If the channel is secured, it should be in MMU bypass mode */
2671 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2676 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2679 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2681 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2684 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2686 struct gaudi_device *gaudi = hdev->asic_specific;
2687 struct hl_hw_queue *q;
2688 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2690 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2693 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694 dma_id = gaudi_dma_assignment[i];
2696 * For queues after the CPU Q need to add 1 to get the correct
2697 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2698 * order to get the correct MSI register.
2702 nic_skip = NIC_NUMBER_OF_ENGINES;
2708 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709 q_idx = 4 * dma_id + j + cpu_skip;
2710 q = &hdev->kernel_queues[q_idx];
2712 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2717 gaudi_init_dma_core(hdev, dma_id);
2719 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2722 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2725 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726 int qman_id, u64 qman_base_addr)
2728 struct cpu_dyn_regs *dyn_regs =
2729 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732 u32 dma_qm_err_cfg, irq_handler_offset;
2733 u32 q_off, dma_qm_offset;
2735 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2737 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741 so_base_en_lo = lower_32_bits(CFG_BASE +
2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743 so_base_en_hi = upper_32_bits(CFG_BASE +
2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 so_base_ws_lo = lower_32_bits(CFG_BASE +
2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751 so_base_ws_hi = upper_32_bits(CFG_BASE +
2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2754 q_off = dma_qm_offset + qman_id * 4;
2757 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758 lower_32_bits(qman_base_addr));
2759 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760 upper_32_bits(qman_base_addr));
2762 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2766 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767 QMAN_CPDMA_SIZE_OFFSET);
2768 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769 QMAN_CPDMA_SRC_OFFSET);
2770 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771 QMAN_CPDMA_DST_OFFSET);
2773 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2777 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778 QMAN_LDMA_SIZE_OFFSET);
2779 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780 QMAN_LDMA_SRC_OFFSET);
2781 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782 QMAN_LDMA_DST_OFFSET);
2784 /* Configure RAZWI IRQ */
2785 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786 if (hdev->stop_on_err)
2788 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2790 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2792 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793 lower_32_bits(CFG_BASE + irq_handler_offset));
2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795 upper_32_bits(CFG_BASE + irq_handler_offset));
2797 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2801 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802 QM_ARB_ERR_MSG_EN_MASK);
2804 /* Increase ARB WDT to support streams architecture */
2805 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2806 GAUDI_ARB_WDT_TIMEOUT);
2808 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2809 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2810 QMAN_INTERNAL_MAKE_TRUSTED);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2815 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2816 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2818 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2819 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2820 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2822 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2824 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2826 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2831 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2833 struct gaudi_device *gaudi = hdev->asic_specific;
2834 struct gaudi_internal_qman_info *q;
2836 int i, j, dma_id, internal_q_index;
2838 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2841 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2842 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2844 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2846 * Add the CPU queue in order to get the correct queue
2847 * number as all internal queue are placed after it
2849 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2851 q = &gaudi->internal_qmans[internal_q_index];
2852 qman_base_addr = (u64) q->pq_dma_addr;
2853 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2857 /* Initializing lower CP for HBM DMA QMAN */
2858 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2860 gaudi_init_dma_core(hdev, dma_id);
2862 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2865 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2868 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2869 int qman_id, u64 qman_base_addr)
2871 struct cpu_dyn_regs *dyn_regs =
2872 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2873 u32 mtr_base_lo, mtr_base_hi;
2874 u32 so_base_lo, so_base_hi;
2875 u32 irq_handler_offset;
2879 mtr_base_lo = lower_32_bits(CFG_BASE +
2880 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881 mtr_base_hi = upper_32_bits(CFG_BASE +
2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2883 so_base_lo = lower_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885 so_base_hi = upper_32_bits(CFG_BASE +
2886 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2888 q_off = mme_offset + qman_id * 4;
2891 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2892 lower_32_bits(qman_base_addr));
2893 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2894 upper_32_bits(qman_base_addr));
2896 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2897 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2898 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2900 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2901 QMAN_CPDMA_SIZE_OFFSET);
2902 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2903 QMAN_CPDMA_SRC_OFFSET);
2904 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2905 QMAN_CPDMA_DST_OFFSET);
2907 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2908 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2909 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2911 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2912 QMAN_LDMA_SIZE_OFFSET);
2913 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2914 QMAN_LDMA_SRC_OFFSET);
2915 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2916 QMAN_LDMA_DST_OFFSET);
2918 /* Configure RAZWI IRQ */
2919 mme_id = mme_offset /
2920 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2922 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2923 if (hdev->stop_on_err)
2925 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2927 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2929 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2930 lower_32_bits(CFG_BASE + irq_handler_offset));
2931 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2932 upper_32_bits(CFG_BASE + irq_handler_offset));
2934 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2935 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2938 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2939 QM_ARB_ERR_MSG_EN_MASK);
2941 /* Increase ARB WDT to support streams architecture */
2942 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2943 GAUDI_ARB_WDT_TIMEOUT);
2945 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2946 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2947 QMAN_INTERNAL_MAKE_TRUSTED);
2950 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2951 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2952 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2953 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2956 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2958 struct gaudi_device *gaudi = hdev->asic_specific;
2959 struct gaudi_internal_qman_info *q;
2962 int i, internal_q_index;
2964 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2968 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2969 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2972 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2974 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2975 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2976 q = &gaudi->internal_qmans[internal_q_index];
2977 qman_base_addr = (u64) q->pq_dma_addr;
2978 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2984 /* Initializing lower CP for MME QMANs */
2985 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2986 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2987 gaudi_init_mme_qman(hdev, 0, 4, 0);
2989 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2990 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2992 gaudi->hw_cap_initialized |= HW_CAP_MME;
2995 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2996 int qman_id, u64 qman_base_addr)
2998 struct cpu_dyn_regs *dyn_regs =
2999 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3000 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3001 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3002 u32 tpc_qm_err_cfg, irq_handler_offset;
3005 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3009 so_base_en_lo = lower_32_bits(CFG_BASE +
3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 so_base_en_hi = upper_32_bits(CFG_BASE +
3012 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3013 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3017 so_base_ws_lo = lower_32_bits(CFG_BASE +
3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019 so_base_ws_hi = upper_32_bits(CFG_BASE +
3020 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3022 q_off = tpc_offset + qman_id * 4;
3024 tpc_id = tpc_offset /
3025 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3028 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3029 lower_32_bits(qman_base_addr));
3030 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3031 upper_32_bits(qman_base_addr));
3033 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3034 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3035 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3037 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3038 QMAN_CPDMA_SIZE_OFFSET);
3039 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3040 QMAN_CPDMA_SRC_OFFSET);
3041 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3042 QMAN_CPDMA_DST_OFFSET);
3044 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3045 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3046 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3048 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3049 QMAN_LDMA_SIZE_OFFSET);
3050 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3051 QMAN_LDMA_SRC_OFFSET);
3052 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3053 QMAN_LDMA_DST_OFFSET);
3055 /* Configure RAZWI IRQ */
3056 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3057 if (hdev->stop_on_err)
3059 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3061 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3064 lower_32_bits(CFG_BASE + irq_handler_offset));
3065 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3066 upper_32_bits(CFG_BASE + irq_handler_offset));
3068 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3069 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3072 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3073 QM_ARB_ERR_MSG_EN_MASK);
3075 /* Increase ARB WDT to support streams architecture */
3076 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3077 GAUDI_ARB_WDT_TIMEOUT);
3079 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3080 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3081 QMAN_INTERNAL_MAKE_TRUSTED);
3084 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3085 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3086 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3087 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3089 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3091 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3093 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3095 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3097 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3102 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3104 struct gaudi_device *gaudi = hdev->asic_specific;
3105 struct gaudi_internal_qman_info *q;
3107 u32 so_base_hi, tpc_offset = 0;
3108 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3109 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3110 int i, tpc_id, internal_q_index;
3112 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3115 so_base_hi = upper_32_bits(CFG_BASE +
3116 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3118 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3119 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3120 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3121 tpc_id * QMAN_STREAMS + i;
3122 q = &gaudi->internal_qmans[internal_q_index];
3123 qman_base_addr = (u64) q->pq_dma_addr;
3124 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3128 /* Initializing lower CP for TPC QMAN */
3129 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3131 /* Enable the QMAN and TPC channel */
3132 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3137 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3140 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3142 gaudi->hw_cap_initialized |=
3143 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3147 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3148 int qman_id, u64 qman_base_addr, int nic_id)
3150 struct cpu_dyn_regs *dyn_regs =
3151 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3152 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3153 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3154 u32 nic_qm_err_cfg, irq_handler_offset;
3157 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3161 so_base_en_lo = lower_32_bits(CFG_BASE +
3162 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163 so_base_en_hi = upper_32_bits(CFG_BASE +
3164 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3165 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3169 so_base_ws_lo = lower_32_bits(CFG_BASE +
3170 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171 so_base_ws_hi = upper_32_bits(CFG_BASE +
3172 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3174 q_off = nic_offset + qman_id * 4;
3176 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3177 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3179 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3180 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3181 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3183 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3184 QMAN_LDMA_SIZE_OFFSET);
3185 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3186 QMAN_LDMA_SRC_OFFSET);
3187 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3188 QMAN_LDMA_DST_OFFSET);
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3191 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3192 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3193 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3195 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3197 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3198 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3199 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3202 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3203 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3204 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3206 /* Configure RAZWI IRQ */
3207 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3208 if (hdev->stop_on_err)
3210 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3212 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3214 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3215 lower_32_bits(CFG_BASE + irq_handler_offset));
3216 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3217 upper_32_bits(CFG_BASE + irq_handler_offset));
3219 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3220 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3223 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3224 QM_ARB_ERR_MSG_EN_MASK);
3226 /* Increase ARB WDT to support streams architecture */
3227 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3228 GAUDI_ARB_WDT_TIMEOUT);
3230 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3231 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3232 QMAN_INTERNAL_MAKE_TRUSTED);
3236 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3238 struct gaudi_device *gaudi = hdev->asic_specific;
3239 struct gaudi_internal_qman_info *q;
3242 u32 nic_delta_between_qmans =
3243 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244 u32 nic_delta_between_nics =
3245 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3246 int i, nic_id, internal_q_index;
3248 if (!hdev->nic_ports_mask)
3251 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3254 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3256 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3257 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3258 nic_offset += nic_delta_between_qmans;
3260 nic_offset -= (nic_delta_between_qmans * 2);
3261 nic_offset += nic_delta_between_nics;
3266 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3267 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3268 nic_id * QMAN_STREAMS + i;
3269 q = &gaudi->internal_qmans[internal_q_index];
3270 qman_base_addr = (u64) q->pq_dma_addr;
3271 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3272 qman_base_addr, nic_id);
3275 /* Enable the QMAN */
3276 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3278 nic_offset += nic_delta_between_qmans;
3280 nic_offset -= (nic_delta_between_qmans * 2);
3281 nic_offset += nic_delta_between_nics;
3284 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3288 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3290 struct gaudi_device *gaudi = hdev->asic_specific;
3292 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3295 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3296 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3297 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3300 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3302 struct gaudi_device *gaudi = hdev->asic_specific;
3304 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3307 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3308 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3309 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3310 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3311 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3314 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3316 struct gaudi_device *gaudi = hdev->asic_specific;
3318 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3321 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3322 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3325 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3327 struct gaudi_device *gaudi = hdev->asic_specific;
3331 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3334 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3335 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3336 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3340 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3342 struct gaudi_device *gaudi = hdev->asic_specific;
3343 u32 nic_mask, nic_offset = 0;
3344 u32 nic_delta_between_qmans =
3345 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346 u32 nic_delta_between_nics =
3347 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3350 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3351 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3353 if (gaudi->hw_cap_initialized & nic_mask)
3354 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3356 nic_offset += nic_delta_between_qmans;
3358 nic_offset -= (nic_delta_between_qmans * 2);
3359 nic_offset += nic_delta_between_nics;
3364 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3366 struct gaudi_device *gaudi = hdev->asic_specific;
3368 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3371 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3372 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3374 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3377 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3379 struct gaudi_device *gaudi = hdev->asic_specific;
3381 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3384 /* Stop CPs of HBM DMA QMANs */
3386 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3390 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3393 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3395 struct gaudi_device *gaudi = hdev->asic_specific;
3397 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3400 /* Stop CPs of MME QMANs */
3401 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3402 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3405 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3407 struct gaudi_device *gaudi = hdev->asic_specific;
3409 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3412 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3419 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3422 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3424 struct gaudi_device *gaudi = hdev->asic_specific;
3426 /* Stop upper CPs of QMANs */
3428 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3429 WREG32(mmNIC0_QM0_GLBL_CFG1,
3430 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3431 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3432 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3434 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3435 WREG32(mmNIC0_QM1_GLBL_CFG1,
3436 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3437 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3438 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3440 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3441 WREG32(mmNIC1_QM0_GLBL_CFG1,
3442 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3443 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3444 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3446 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3447 WREG32(mmNIC1_QM1_GLBL_CFG1,
3448 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3449 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3450 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3452 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3453 WREG32(mmNIC2_QM0_GLBL_CFG1,
3454 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3455 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3456 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3458 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3459 WREG32(mmNIC2_QM1_GLBL_CFG1,
3460 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3461 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3462 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3464 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3465 WREG32(mmNIC3_QM0_GLBL_CFG1,
3466 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3467 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3468 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3470 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3471 WREG32(mmNIC3_QM1_GLBL_CFG1,
3472 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3473 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3474 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3476 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3477 WREG32(mmNIC4_QM0_GLBL_CFG1,
3478 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3479 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3480 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3482 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3483 WREG32(mmNIC4_QM1_GLBL_CFG1,
3484 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3485 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3486 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3489 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3491 struct gaudi_device *gaudi = hdev->asic_specific;
3493 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3496 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3498 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3501 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3503 struct gaudi_device *gaudi = hdev->asic_specific;
3505 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3508 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3512 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3515 static void gaudi_mme_stall(struct hl_device *hdev)
3517 struct gaudi_device *gaudi = hdev->asic_specific;
3519 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3522 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3523 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3536 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3537 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3538 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3541 static void gaudi_tpc_stall(struct hl_device *hdev)
3543 struct gaudi_device *gaudi = hdev->asic_specific;
3545 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3548 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3555 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3558 static void gaudi_set_clock_gating(struct hl_device *hdev)
3560 struct gaudi_device *gaudi = hdev->asic_specific;
3565 /* In case we are during debug session, don't enable the clock gate
3566 * as it may interfere
3571 if (hdev->asic_prop.fw_security_enabled)
3574 for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3575 enable = !!(hdev->clock_gating_mask &
3576 (BIT_ULL(gaudi_dma_assignment[i])));
3578 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3579 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3580 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3581 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3582 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3585 for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3586 enable = !!(hdev->clock_gating_mask &
3587 (BIT_ULL(gaudi_dma_assignment[i])));
3589 /* GC sends work to DMA engine through Upper CP in DMA5 so
3590 * we need to not enable clock gating in that DMA
3592 if (i == GAUDI_HBM_DMA_4)
3595 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3596 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3597 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3598 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3599 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3602 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3603 WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3604 WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3606 enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3607 WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3608 WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3610 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3611 enable = !!(hdev->clock_gating_mask &
3612 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3614 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3615 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3616 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3617 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3619 qman_offset += TPC_QMAN_OFFSET;
3622 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3625 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3627 struct gaudi_device *gaudi = hdev->asic_specific;
3631 if (hdev->asic_prop.fw_security_enabled)
3634 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3635 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3636 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3638 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3641 WREG32(mmMME0_QM_CGM_CFG, 0);
3642 WREG32(mmMME0_QM_CGM_CFG1, 0);
3643 WREG32(mmMME2_QM_CGM_CFG, 0);
3644 WREG32(mmMME2_QM_CGM_CFG1, 0);
3646 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3647 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3648 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3650 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3653 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3656 static void gaudi_enable_timestamp(struct hl_device *hdev)
3658 /* Disable the timestamp counter */
3659 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3661 /* Zero the lower/upper parts of the 64-bit counter */
3662 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3663 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3665 /* Enable the counter */
3666 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3669 static void gaudi_disable_timestamp(struct hl_device *hdev)
3671 /* Disable the timestamp counter */
3672 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3675 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3677 u32 wait_timeout_ms;
3680 "Halting compute engines and disabling interrupts\n");
3683 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3685 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3687 gaudi_stop_nic_qmans(hdev);
3688 gaudi_stop_mme_qmans(hdev);
3689 gaudi_stop_tpc_qmans(hdev);
3690 gaudi_stop_hbm_dma_qmans(hdev);
3691 gaudi_stop_pci_dma_qmans(hdev);
3693 hdev->asic_funcs->disable_clock_gating(hdev);
3695 msleep(wait_timeout_ms);
3697 gaudi_pci_dma_stall(hdev);
3698 gaudi_hbm_dma_stall(hdev);
3699 gaudi_tpc_stall(hdev);
3700 gaudi_mme_stall(hdev);
3702 msleep(wait_timeout_ms);
3704 gaudi_disable_nic_qmans(hdev);
3705 gaudi_disable_mme_qmans(hdev);
3706 gaudi_disable_tpc_qmans(hdev);
3707 gaudi_disable_hbm_dma_qmans(hdev);
3708 gaudi_disable_pci_dma_qmans(hdev);
3710 gaudi_disable_timestamp(hdev);
3712 gaudi_disable_msi(hdev);
3715 static int gaudi_mmu_init(struct hl_device *hdev)
3717 struct asic_fixed_properties *prop = &hdev->asic_prop;
3718 struct gaudi_device *gaudi = hdev->asic_specific;
3722 if (!hdev->mmu_enable)
3725 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3728 for (i = 0 ; i < prop->max_asid ; i++) {
3729 hop0_addr = prop->mmu_pgt_addr +
3730 (i * prop->mmu_hop_table_size);
3732 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3735 "failed to set hop0 addr for asid %d\n", i);
3740 /* init MMU cache manage page */
3741 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3742 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3744 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3746 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3747 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3749 WREG32(mmSTLB_HOP_CONFIGURATION,
3750 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3753 * The H/W expects the first PI after init to be 1. After wraparound
3756 gaudi->mmu_cache_inv_pi = 1;
3758 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3766 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3770 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3772 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3775 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3779 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3781 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3784 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3786 struct dynamic_fw_load_mgr *dynamic_loader;
3787 struct cpu_dyn_regs *dyn_regs;
3789 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3792 * here we update initial values for few specific dynamic regs (as
3793 * before reading the first descriptor from FW those value has to be
3794 * hard-coded) in later stages of the protocol those values will be
3795 * updated automatically by reading the FW descriptor so data there
3796 * will always be up-to-date
3798 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3799 dyn_regs->kmd_msg_to_cpu =
3800 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3801 dyn_regs->cpu_cmd_status_to_host =
3802 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3804 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3807 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3809 struct static_fw_load_mgr *static_loader;
3811 static_loader = &hdev->fw_loader.static_loader;
3813 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3814 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3815 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3816 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3817 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3818 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3819 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3820 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3821 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3822 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3823 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3824 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3825 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3826 GAUDI_PLDM_RESET_WAIT_MSEC :
3827 GAUDI_CPU_RESET_WAIT_MSEC;
3830 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3832 struct asic_fixed_properties *prop = &hdev->asic_prop;
3833 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3835 /* fill common fields */
3836 fw_loader->linux_loaded = false;
3837 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3838 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3839 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3840 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3841 fw_loader->skip_bmc = !hdev->bmc_enable;
3842 fw_loader->sram_bar_id = SRAM_BAR_ID;
3843 fw_loader->dram_bar_id = HBM_BAR_ID;
3845 if (prop->dynamic_fw_load)
3846 gaudi_init_dynamic_firmware_loader(hdev);
3848 gaudi_init_static_firmware_loader(hdev);
3851 static int gaudi_init_cpu(struct hl_device *hdev)
3853 struct gaudi_device *gaudi = hdev->asic_specific;
3856 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3859 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3863 * The device CPU works with 40 bits addresses.
3864 * This register sets the extension to 50 bits.
3866 if (!hdev->asic_prop.fw_security_enabled)
3867 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3869 rc = hl_fw_init_cpu(hdev);
3874 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3879 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3881 struct cpu_dyn_regs *dyn_regs =
3882 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3883 struct asic_fixed_properties *prop = &hdev->asic_prop;
3884 struct gaudi_device *gaudi = hdev->asic_specific;
3885 u32 status, irq_handler_offset;
3887 struct hl_hw_queue *cpu_pq =
3888 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3891 if (!hdev->cpu_queues_enable)
3894 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3897 eq = &hdev->event_queue;
3899 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3900 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3902 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3903 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3905 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3906 lower_32_bits(hdev->cpu_accessible_dma_address));
3907 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3908 upper_32_bits(hdev->cpu_accessible_dma_address));
3910 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3911 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3912 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3914 /* Used for EQ CI */
3915 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3917 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3919 if (gaudi->multi_msi_mode)
3920 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3922 WREG32(mmCPU_IF_QUEUE_INIT,
3923 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3925 irq_handler_offset = prop->gic_interrupts_enable ?
3926 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3927 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3929 WREG32(irq_handler_offset,
3930 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3932 err = hl_poll_timeout(
3934 mmCPU_IF_QUEUE_INIT,
3936 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3942 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3946 /* update FW application security bits */
3947 if (prop->fw_cpu_boot_dev_sts0_valid)
3948 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3949 if (prop->fw_cpu_boot_dev_sts1_valid)
3950 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3952 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3956 static void gaudi_pre_hw_init(struct hl_device *hdev)
3958 /* Perform read from the device to make sure device is up */
3961 if (!hdev->asic_prop.fw_security_enabled) {
3962 /* Set the access through PCI bars (Linux driver only) as
3965 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3966 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3967 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3969 /* Perform read to flush the waiting writes to ensure
3970 * configuration was set in the device
3972 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3976 * Let's mark in the H/W that we have reached this point. We check
3977 * this value in the reset_before_init function to understand whether
3978 * we need to reset the chip before doing H/W init. This register is
3979 * cleared by the H/W upon H/W reset
3981 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3984 static int gaudi_hw_init(struct hl_device *hdev)
3986 struct gaudi_device *gaudi = hdev->asic_specific;
3989 gaudi_pre_hw_init(hdev);
3991 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3992 * So we set it here and if anyone tries to move it later to
3993 * a different address, there will be an error
3995 if (hdev->asic_prop.iatu_done_by_fw)
3996 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3999 * Before pushing u-boot/linux to device, need to set the hbm bar to
4000 * base address of dram
4002 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4004 "failed to map HBM bar to DRAM base address\n");
4008 rc = gaudi_init_cpu(hdev);
4010 dev_err(hdev->dev, "failed to initialize CPU\n");
4014 /* In case the clock gating was enabled in preboot we need to disable
4015 * it here before touching the MME/TPC registers.
4016 * There is no need to take clk gating mutex because when this function
4017 * runs, no other relevant code can run
4019 hdev->asic_funcs->disable_clock_gating(hdev);
4021 /* SRAM scrambler must be initialized after CPU is running from HBM */
4022 gaudi_init_scrambler_sram(hdev);
4024 /* This is here just in case we are working without CPU */
4025 gaudi_init_scrambler_hbm(hdev);
4027 gaudi_init_golden_registers(hdev);
4029 rc = gaudi_mmu_init(hdev);
4033 gaudi_init_security(hdev);
4035 gaudi_init_pci_dma_qmans(hdev);
4037 gaudi_init_hbm_dma_qmans(hdev);
4039 gaudi_init_mme_qmans(hdev);
4041 gaudi_init_tpc_qmans(hdev);
4043 gaudi_init_nic_qmans(hdev);
4045 hdev->asic_funcs->set_clock_gating(hdev);
4047 gaudi_enable_timestamp(hdev);
4049 /* MSI must be enabled before CPU queues and NIC are initialized */
4050 rc = gaudi_enable_msi(hdev);
4052 goto disable_queues;
4054 /* must be called after MSI was enabled */
4055 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4057 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4062 /* Perform read from the device to flush all configuration */
4068 gaudi_disable_msi(hdev);
4070 gaudi_disable_mme_qmans(hdev);
4071 gaudi_disable_pci_dma_qmans(hdev);
4076 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4078 struct cpu_dyn_regs *dyn_regs =
4079 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4080 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4081 struct gaudi_device *gaudi = hdev->asic_specific;
4082 bool driver_performs_reset;
4085 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4090 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4091 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4093 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4094 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4097 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4098 !hdev->asic_prop.hard_reset_done_by_fw);
4100 /* Set device to handle FLR by H/W as we will put the device CPU to
4103 if (driver_performs_reset)
4104 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4105 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4107 /* If linux is loaded in the device CPU we need to communicate with it
4108 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4109 * registers in case of old F/Ws
4111 if (hdev->fw_loader.linux_loaded) {
4112 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4113 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4114 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4116 WREG32(irq_handler_offset,
4117 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4119 if (hdev->asic_prop.hard_reset_done_by_fw)
4120 hl_fw_ask_hard_reset_without_linux(hdev);
4122 hl_fw_ask_halt_machine_without_linux(hdev);
4125 if (driver_performs_reset) {
4127 /* Configure the reset registers. Must be done as early as
4128 * possible in case we fail during H/W initialization
4130 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4131 (CFG_RST_H_DMA_MASK |
4132 CFG_RST_H_MME_MASK |
4134 CFG_RST_H_TPC_7_MASK));
4136 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4138 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4139 (CFG_RST_H_HBM_MASK |
4140 CFG_RST_H_TPC_7_MASK |
4141 CFG_RST_H_NIC_MASK |
4143 CFG_RST_H_DMA_MASK |
4144 CFG_RST_H_MME_MASK |
4145 CFG_RST_H_CPU_MASK |
4146 CFG_RST_H_MMU_MASK));
4148 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4149 (CFG_RST_L_IF_MASK |
4150 CFG_RST_L_PSOC_MASK |
4151 CFG_RST_L_TPC_MASK));
4153 msleep(cpu_timeout_ms);
4155 /* Tell ASIC not to re-initialize PCIe */
4156 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4158 /* Restart BTL/BLR upon hard-reset */
4159 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4161 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4162 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4165 "Issued HARD reset command, going to wait %dms\n",
4169 "Firmware performs HARD reset, going to wait %dms\n",
4174 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4175 * itself is in reset. Need to wait until the reset is deasserted
4177 msleep(reset_timeout_ms);
4179 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4180 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4182 "Timeout while waiting for device to reset 0x%x\n",
4186 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4187 HW_CAP_HBM | HW_CAP_PCI_DMA |
4188 HW_CAP_MME | HW_CAP_TPC_MASK |
4189 HW_CAP_HBM_DMA | HW_CAP_PLL |
4190 HW_CAP_NIC_MASK | HW_CAP_MMU |
4191 HW_CAP_SRAM_SCRAMBLER |
4192 HW_CAP_HBM_SCRAMBLER |
4195 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4197 hdev->device_cpu_is_halted = false;
4201 static int gaudi_suspend(struct hl_device *hdev)
4205 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4207 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4212 static int gaudi_resume(struct hl_device *hdev)
4214 return gaudi_init_iatu(hdev);
4217 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4218 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4222 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4223 VM_DONTCOPY | VM_NORESERVE;
4225 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4226 (dma_addr - HOST_PHYS_BASE), size);
4228 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4233 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4235 struct cpu_dyn_regs *dyn_regs =
4236 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4237 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4238 struct gaudi_device *gaudi = hdev->asic_specific;
4239 bool invalid_queue = false;
4242 switch (hw_queue_id) {
4243 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4244 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4245 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4247 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4250 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4251 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4252 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4253 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4254 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4257 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4258 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4259 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4260 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4261 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4264 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4265 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4266 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4267 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4268 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4271 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4272 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4273 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4274 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4275 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4278 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4279 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4280 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4281 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4282 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4285 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4286 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4287 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4288 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4289 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4292 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4293 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4294 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4295 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4296 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4299 case GAUDI_QUEUE_ID_CPU_PQ:
4300 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4301 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4303 invalid_queue = true;
4306 case GAUDI_QUEUE_ID_MME_0_0:
4307 db_reg_offset = mmMME2_QM_PQ_PI_0;
4310 case GAUDI_QUEUE_ID_MME_0_1:
4311 db_reg_offset = mmMME2_QM_PQ_PI_1;
4314 case GAUDI_QUEUE_ID_MME_0_2:
4315 db_reg_offset = mmMME2_QM_PQ_PI_2;
4318 case GAUDI_QUEUE_ID_MME_0_3:
4319 db_reg_offset = mmMME2_QM_PQ_PI_3;
4322 case GAUDI_QUEUE_ID_MME_1_0:
4323 db_reg_offset = mmMME0_QM_PQ_PI_0;
4326 case GAUDI_QUEUE_ID_MME_1_1:
4327 db_reg_offset = mmMME0_QM_PQ_PI_1;
4330 case GAUDI_QUEUE_ID_MME_1_2:
4331 db_reg_offset = mmMME0_QM_PQ_PI_2;
4334 case GAUDI_QUEUE_ID_MME_1_3:
4335 db_reg_offset = mmMME0_QM_PQ_PI_3;
4338 case GAUDI_QUEUE_ID_TPC_0_0:
4339 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4342 case GAUDI_QUEUE_ID_TPC_0_1:
4343 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4346 case GAUDI_QUEUE_ID_TPC_0_2:
4347 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4350 case GAUDI_QUEUE_ID_TPC_0_3:
4351 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4354 case GAUDI_QUEUE_ID_TPC_1_0:
4355 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4358 case GAUDI_QUEUE_ID_TPC_1_1:
4359 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4362 case GAUDI_QUEUE_ID_TPC_1_2:
4363 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4366 case GAUDI_QUEUE_ID_TPC_1_3:
4367 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4370 case GAUDI_QUEUE_ID_TPC_2_0:
4371 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4374 case GAUDI_QUEUE_ID_TPC_2_1:
4375 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4378 case GAUDI_QUEUE_ID_TPC_2_2:
4379 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4382 case GAUDI_QUEUE_ID_TPC_2_3:
4383 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4386 case GAUDI_QUEUE_ID_TPC_3_0:
4387 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4390 case GAUDI_QUEUE_ID_TPC_3_1:
4391 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4394 case GAUDI_QUEUE_ID_TPC_3_2:
4395 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4398 case GAUDI_QUEUE_ID_TPC_3_3:
4399 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4402 case GAUDI_QUEUE_ID_TPC_4_0:
4403 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4406 case GAUDI_QUEUE_ID_TPC_4_1:
4407 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4410 case GAUDI_QUEUE_ID_TPC_4_2:
4411 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4414 case GAUDI_QUEUE_ID_TPC_4_3:
4415 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4418 case GAUDI_QUEUE_ID_TPC_5_0:
4419 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4422 case GAUDI_QUEUE_ID_TPC_5_1:
4423 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4426 case GAUDI_QUEUE_ID_TPC_5_2:
4427 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4430 case GAUDI_QUEUE_ID_TPC_5_3:
4431 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4434 case GAUDI_QUEUE_ID_TPC_6_0:
4435 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4438 case GAUDI_QUEUE_ID_TPC_6_1:
4439 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4442 case GAUDI_QUEUE_ID_TPC_6_2:
4443 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4446 case GAUDI_QUEUE_ID_TPC_6_3:
4447 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4450 case GAUDI_QUEUE_ID_TPC_7_0:
4451 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4454 case GAUDI_QUEUE_ID_TPC_7_1:
4455 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4458 case GAUDI_QUEUE_ID_TPC_7_2:
4459 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4462 case GAUDI_QUEUE_ID_TPC_7_3:
4463 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4466 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4467 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4468 invalid_queue = true;
4470 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4471 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4474 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4475 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4476 invalid_queue = true;
4478 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4479 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4482 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4483 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4484 invalid_queue = true;
4486 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4487 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4490 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4491 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4492 invalid_queue = true;
4494 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4495 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4498 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4499 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4500 invalid_queue = true;
4502 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4503 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4506 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4507 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4508 invalid_queue = true;
4510 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4511 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4514 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4515 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4516 invalid_queue = true;
4518 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4519 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4522 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4523 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4524 invalid_queue = true;
4526 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4527 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4530 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4531 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4532 invalid_queue = true;
4534 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4535 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4538 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4539 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4540 invalid_queue = true;
4542 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4543 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4547 invalid_queue = true;
4550 if (invalid_queue) {
4551 /* Should never get here */
4552 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4559 /* ring the doorbell */
4560 WREG32(db_reg_offset, db_value);
4562 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4563 /* make sure device CPU will read latest data from host */
4566 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4567 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4568 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4570 WREG32(irq_handler_offset,
4571 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4575 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4578 __le64 *pbd = (__le64 *) bd;
4580 /* The QMANs are on the host memory so a simple copy suffice */
4585 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4586 dma_addr_t *dma_handle, gfp_t flags)
4588 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4591 /* Shift to the device's base physical address of host memory */
4593 *dma_handle += HOST_PHYS_BASE;
4598 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4599 void *cpu_addr, dma_addr_t dma_handle)
4601 /* Cancel the device's base physical address of host memory */
4602 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4604 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4607 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4609 struct asic_fixed_properties *prop = &hdev->asic_prop;
4610 u64 cur_addr = DRAM_BASE_ADDR_USER;
4615 while (cur_addr < prop->dram_end_address) {
4616 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4617 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4620 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4623 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4624 cur_addr, cur_addr + chunk_size);
4626 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4627 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4628 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4629 lower_32_bits(cur_addr));
4630 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4631 upper_32_bits(cur_addr));
4632 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4634 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4635 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4636 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4638 cur_addr += chunk_size;
4640 if (cur_addr == prop->dram_end_address)
4644 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4645 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4647 rc = hl_poll_timeout(
4649 mmDMA0_CORE_STS0 + dma_offset,
4651 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4653 HBM_SCRUBBING_TIMEOUT_US);
4657 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4667 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4669 struct asic_fixed_properties *prop = &hdev->asic_prop;
4670 struct gaudi_device *gaudi = hdev->asic_specific;
4674 if (!hdev->memory_scrub)
4677 if (!addr && !size) {
4678 /* Wait till device is idle */
4679 rc = hl_poll_timeout(
4681 mmDMA0_CORE_STS0/* dummy */,
4683 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4686 HBM_SCRUBBING_TIMEOUT_US);
4688 dev_err(hdev->dev, "waiting for idle timeout\n");
4693 addr = prop->sram_user_base_address;
4694 size = hdev->pldm ? 0x10000 :
4695 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4696 val = 0x7777777777777777ull;
4698 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4701 "Failed to clear SRAM in mem scrub all\n");
4705 mutex_lock(&gaudi->clk_gate_mutex);
4706 hdev->asic_funcs->disable_clock_gating(hdev);
4708 /* Scrub HBM using all DMA channels in parallel */
4709 rc = gaudi_hbm_scrubbing(hdev);
4712 "Failed to clear HBM in mem scrub all\n");
4714 hdev->asic_funcs->set_clock_gating(hdev);
4715 mutex_unlock(&gaudi->clk_gate_mutex);
4721 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4722 u32 queue_id, dma_addr_t *dma_handle,
4725 struct gaudi_device *gaudi = hdev->asic_specific;
4726 struct gaudi_internal_qman_info *q;
4728 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4729 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4730 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4734 q = &gaudi->internal_qmans[queue_id];
4735 *dma_handle = q->pq_dma_addr;
4736 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4738 return q->pq_kernel_addr;
4741 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4742 u16 len, u32 timeout, u64 *result)
4744 struct gaudi_device *gaudi = hdev->asic_specific;
4746 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4753 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4755 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4759 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4761 struct packet_msg_prot *fence_pkt;
4762 dma_addr_t pkt_dma_addr;
4763 u32 fence_val, tmp, timeout_usec;
4764 dma_addr_t fence_dma_addr;
4769 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4771 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4773 fence_val = GAUDI_QMAN0_FENCE_VAL;
4775 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4779 "Failed to allocate memory for H/W queue %d testing\n",
4786 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4787 sizeof(struct packet_msg_prot),
4788 GFP_KERNEL, &pkt_dma_addr);
4791 "Failed to allocate packet for H/W queue %d testing\n",
4794 goto free_fence_ptr;
4797 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4798 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4799 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4801 fence_pkt->ctl = cpu_to_le32(tmp);
4802 fence_pkt->value = cpu_to_le32(fence_val);
4803 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4805 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4806 sizeof(struct packet_msg_prot),
4810 "Failed to send fence packet to H/W queue %d\n",
4815 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4816 1000, timeout_usec, true);
4818 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4820 if (rc == -ETIMEDOUT) {
4822 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4823 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4828 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4831 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4836 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4838 struct gaudi_device *gaudi = hdev->asic_specific;
4841 * check capability here as send_cpu_message() won't update the result
4842 * value if no capability
4844 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4847 return hl_fw_test_cpu_queue(hdev);
4850 static int gaudi_test_queues(struct hl_device *hdev)
4852 int i, rc, ret_val = 0;
4854 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4855 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4856 rc = gaudi_test_queue(hdev, i);
4862 rc = gaudi_test_cpu_queue(hdev);
4869 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4870 gfp_t mem_flags, dma_addr_t *dma_handle)
4874 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4877 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4879 /* Shift to the device's base physical address of host memory */
4881 *dma_handle += HOST_PHYS_BASE;
4886 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4887 dma_addr_t dma_addr)
4889 /* Cancel the device's base physical address of host memory */
4890 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4892 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4895 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4896 size_t size, dma_addr_t *dma_handle)
4898 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4901 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4902 size_t size, void *vaddr)
4904 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4907 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4908 int nents, enum dma_data_direction dir)
4910 struct scatterlist *sg;
4913 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4916 /* Shift to the device's base physical address of host memory */
4917 for_each_sg(sgl, sg, nents, i)
4918 sg->dma_address += HOST_PHYS_BASE;
4923 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4924 int nents, enum dma_data_direction dir)
4926 struct scatterlist *sg;
4929 /* Cancel the device's base physical address of host memory */
4930 for_each_sg(sgl, sg, nents, i)
4931 sg->dma_address -= HOST_PHYS_BASE;
4933 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4936 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4937 struct sg_table *sgt)
4939 struct scatterlist *sg, *sg_next_iter;
4940 u32 count, dma_desc_cnt;
4942 dma_addr_t addr, addr_next;
4946 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4948 len = sg_dma_len(sg);
4949 addr = sg_dma_address(sg);
4954 while ((count + 1) < sgt->nents) {
4955 sg_next_iter = sg_next(sg);
4956 len_next = sg_dma_len(sg_next_iter);
4957 addr_next = sg_dma_address(sg_next_iter);
4962 if ((addr + len == addr_next) &&
4963 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4975 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4978 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4979 struct hl_cs_parser *parser,
4980 struct packet_lin_dma *user_dma_pkt,
4981 u64 addr, enum dma_data_direction dir)
4983 struct hl_userptr *userptr;
4986 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4987 parser->job_userptr_list, &userptr))
4988 goto already_pinned;
4990 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4994 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4999 list_add_tail(&userptr->job_node, parser->job_userptr_list);
5001 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5002 userptr->sgt->nents, dir);
5004 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5008 userptr->dma_mapped = true;
5012 parser->patched_cb_size +=
5013 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5018 list_del(&userptr->job_node);
5019 hl_unpin_host_memory(hdev, userptr);
5025 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5026 struct hl_cs_parser *parser,
5027 struct packet_lin_dma *user_dma_pkt,
5030 enum dma_data_direction dir;
5031 bool skip_host_mem_pin = false, user_memset;
5035 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5036 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5037 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5041 skip_host_mem_pin = true;
5043 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5044 dir = DMA_TO_DEVICE;
5045 addr = le64_to_cpu(user_dma_pkt->src_addr);
5047 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5048 dir = DMA_FROM_DEVICE;
5049 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5050 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5051 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5054 if (skip_host_mem_pin)
5055 parser->patched_cb_size += sizeof(*user_dma_pkt);
5057 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5063 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5064 struct hl_cs_parser *parser,
5065 struct packet_lin_dma *user_dma_pkt)
5067 bool src_in_host = false;
5068 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5069 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5070 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5072 dev_dbg(hdev->dev, "DMA packet details:\n");
5073 dev_dbg(hdev->dev, "source == 0x%llx\n",
5074 le64_to_cpu(user_dma_pkt->src_addr));
5075 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5076 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5079 * Special handling for DMA with size 0. Bypass all validations
5080 * because no transactions will be done except for WR_COMP, which
5081 * is not a security issue
5083 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5084 parser->patched_cb_size += sizeof(*user_dma_pkt);
5088 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5091 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5095 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5096 struct hl_cs_parser *parser,
5097 struct packet_load_and_exe *user_pkt)
5101 cfg = le32_to_cpu(user_pkt->cfg);
5103 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5105 "User not allowed to use Load and Execute\n");
5109 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5114 static int gaudi_validate_cb(struct hl_device *hdev,
5115 struct hl_cs_parser *parser, bool is_mmu)
5117 u32 cb_parsed_length = 0;
5120 parser->patched_cb_size = 0;
5122 /* cb_user_size is more than 0 so loop will always be executed */
5123 while (cb_parsed_length < parser->user_cb_size) {
5124 enum packet_id pkt_id;
5126 struct gaudi_packet *user_pkt;
5128 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5130 pkt_id = (enum packet_id) (
5131 (le64_to_cpu(user_pkt->header) &
5132 PACKET_HEADER_PACKET_ID_MASK) >>
5133 PACKET_HEADER_PACKET_ID_SHIFT);
5135 if (!validate_packet_id(pkt_id)) {
5136 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5141 pkt_size = gaudi_packet_sizes[pkt_id];
5142 cb_parsed_length += pkt_size;
5143 if (cb_parsed_length > parser->user_cb_size) {
5145 "packet 0x%x is out of CB boundary\n", pkt_id);
5151 case PACKET_MSG_PROT:
5153 "User not allowed to use MSG_PROT\n");
5158 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5163 dev_err(hdev->dev, "User not allowed to use STOP\n");
5167 case PACKET_WREG_BULK:
5169 "User not allowed to use WREG_BULK\n");
5173 case PACKET_LOAD_AND_EXE:
5174 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5175 (struct packet_load_and_exe *) user_pkt);
5178 case PACKET_LIN_DMA:
5179 parser->contains_dma_pkt = true;
5181 parser->patched_cb_size += pkt_size;
5183 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5184 (struct packet_lin_dma *) user_pkt);
5187 case PACKET_WREG_32:
5188 case PACKET_MSG_LONG:
5189 case PACKET_MSG_SHORT:
5193 case PACKET_ARB_POINT:
5194 parser->patched_cb_size += pkt_size;
5198 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5209 * The new CB should have space at the end for two MSG_PROT packets:
5210 * 1. A packet that will act as a completion packet
5211 * 2. A packet that will generate MSI-X interrupt
5213 if (parser->completion)
5214 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5219 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5220 struct hl_cs_parser *parser,
5221 struct packet_lin_dma *user_dma_pkt,
5222 struct packet_lin_dma *new_dma_pkt,
5223 u32 *new_dma_pkt_size)
5225 struct hl_userptr *userptr;
5226 struct scatterlist *sg, *sg_next_iter;
5227 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5229 dma_addr_t dma_addr, dma_addr_next;
5230 u64 device_memory_addr, addr;
5231 enum dma_data_direction dir;
5232 struct sg_table *sgt;
5233 bool src_in_host = false;
5234 bool skip_host_mem_pin = false;
5237 ctl = le32_to_cpu(user_dma_pkt->ctl);
5239 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5242 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5243 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5246 addr = le64_to_cpu(user_dma_pkt->src_addr);
5247 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5248 dir = DMA_TO_DEVICE;
5250 skip_host_mem_pin = true;
5252 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5253 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5254 dir = DMA_FROM_DEVICE;
5257 if ((!skip_host_mem_pin) &&
5258 (!hl_userptr_is_pinned(hdev, addr,
5259 le32_to_cpu(user_dma_pkt->tsize),
5260 parser->job_userptr_list, &userptr))) {
5261 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5262 addr, user_dma_pkt->tsize);
5266 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5267 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5268 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5272 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5277 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5278 len = sg_dma_len(sg);
5279 dma_addr = sg_dma_address(sg);
5284 while ((count + 1) < sgt->nents) {
5285 sg_next_iter = sg_next(sg);
5286 len_next = sg_dma_len(sg_next_iter);
5287 dma_addr_next = sg_dma_address(sg_next_iter);
5292 if ((dma_addr + len == dma_addr_next) &&
5293 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5302 ctl = le32_to_cpu(user_dma_pkt->ctl);
5303 if (likely(dma_desc_cnt))
5304 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5305 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5306 new_dma_pkt->ctl = cpu_to_le32(ctl);
5307 new_dma_pkt->tsize = cpu_to_le32(len);
5309 if (dir == DMA_TO_DEVICE) {
5310 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5311 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5313 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5314 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5318 device_memory_addr += len;
5323 if (!dma_desc_cnt) {
5325 "Error of 0 SG entries when patching DMA packet\n");
5329 /* Fix the last dma packet - wrcomp must be as user set it */
5331 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5333 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5338 static int gaudi_patch_cb(struct hl_device *hdev,
5339 struct hl_cs_parser *parser)
5341 u32 cb_parsed_length = 0;
5342 u32 cb_patched_cur_length = 0;
5345 /* cb_user_size is more than 0 so loop will always be executed */
5346 while (cb_parsed_length < parser->user_cb_size) {
5347 enum packet_id pkt_id;
5349 u32 new_pkt_size = 0;
5350 struct gaudi_packet *user_pkt, *kernel_pkt;
5352 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5353 kernel_pkt = parser->patched_cb->kernel_address +
5354 cb_patched_cur_length;
5356 pkt_id = (enum packet_id) (
5357 (le64_to_cpu(user_pkt->header) &
5358 PACKET_HEADER_PACKET_ID_MASK) >>
5359 PACKET_HEADER_PACKET_ID_SHIFT);
5361 if (!validate_packet_id(pkt_id)) {
5362 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5367 pkt_size = gaudi_packet_sizes[pkt_id];
5368 cb_parsed_length += pkt_size;
5369 if (cb_parsed_length > parser->user_cb_size) {
5371 "packet 0x%x is out of CB boundary\n", pkt_id);
5377 case PACKET_LIN_DMA:
5378 rc = gaudi_patch_dma_packet(hdev, parser,
5379 (struct packet_lin_dma *) user_pkt,
5380 (struct packet_lin_dma *) kernel_pkt,
5382 cb_patched_cur_length += new_pkt_size;
5385 case PACKET_MSG_PROT:
5387 "User not allowed to use MSG_PROT\n");
5392 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5397 dev_err(hdev->dev, "User not allowed to use STOP\n");
5401 case PACKET_WREG_32:
5402 case PACKET_WREG_BULK:
5403 case PACKET_MSG_LONG:
5404 case PACKET_MSG_SHORT:
5408 case PACKET_ARB_POINT:
5409 case PACKET_LOAD_AND_EXE:
5410 memcpy(kernel_pkt, user_pkt, pkt_size);
5411 cb_patched_cur_length += pkt_size;
5415 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5428 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5429 struct hl_cs_parser *parser)
5431 u64 patched_cb_handle;
5432 u32 patched_cb_size;
5433 struct hl_cb *user_cb;
5437 * The new CB should have space at the end for two MSG_PROT pkt:
5438 * 1. A packet that will act as a completion packet
5439 * 2. A packet that will generate MSI interrupt
5441 if (parser->completion)
5442 parser->patched_cb_size = parser->user_cb_size +
5443 sizeof(struct packet_msg_prot) * 2;
5445 parser->patched_cb_size = parser->user_cb_size;
5447 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5448 parser->patched_cb_size, false, false,
5449 &patched_cb_handle);
5453 "Failed to allocate patched CB for DMA CS %d\n",
5458 patched_cb_handle >>= PAGE_SHIFT;
5459 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5460 (u32) patched_cb_handle);
5461 /* hl_cb_get should never fail */
5462 if (!parser->patched_cb) {
5463 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5464 (u32) patched_cb_handle);
5470 * The check that parser->user_cb_size <= parser->user_cb->size was done
5471 * in validate_queue_index().
5473 memcpy(parser->patched_cb->kernel_address,
5474 parser->user_cb->kernel_address,
5475 parser->user_cb_size);
5477 patched_cb_size = parser->patched_cb_size;
5479 /* Validate patched CB instead of user CB */
5480 user_cb = parser->user_cb;
5481 parser->user_cb = parser->patched_cb;
5482 rc = gaudi_validate_cb(hdev, parser, true);
5483 parser->user_cb = user_cb;
5486 hl_cb_put(parser->patched_cb);
5490 if (patched_cb_size != parser->patched_cb_size) {
5491 dev_err(hdev->dev, "user CB size mismatch\n");
5492 hl_cb_put(parser->patched_cb);
5499 * Always call cb destroy here because we still have 1 reference
5500 * to it by calling cb_get earlier. After the job will be completed,
5501 * cb_put will release it, but here we want to remove it from the
5504 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5505 patched_cb_handle << PAGE_SHIFT);
5510 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5511 struct hl_cs_parser *parser)
5513 u64 patched_cb_handle;
5516 rc = gaudi_validate_cb(hdev, parser, false);
5521 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5522 parser->patched_cb_size, false, false,
5523 &patched_cb_handle);
5526 "Failed to allocate patched CB for DMA CS %d\n", rc);
5530 patched_cb_handle >>= PAGE_SHIFT;
5531 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5532 (u32) patched_cb_handle);
5533 /* hl_cb_get should never fail here */
5534 if (!parser->patched_cb) {
5535 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5536 (u32) patched_cb_handle);
5541 rc = gaudi_patch_cb(hdev, parser);
5544 hl_cb_put(parser->patched_cb);
5548 * Always call cb destroy here because we still have 1 reference
5549 * to it by calling cb_get earlier. After the job will be completed,
5550 * cb_put will release it, but here we want to remove it from the
5553 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5554 patched_cb_handle << PAGE_SHIFT);
5558 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5562 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5563 struct hl_cs_parser *parser)
5565 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5566 struct gaudi_device *gaudi = hdev->asic_specific;
5567 u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5568 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5570 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5571 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5572 (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5573 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5574 parser->hw_queue_id);
5578 /* For internal queue jobs just check if CB address is valid */
5579 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5580 parser->user_cb_size,
5581 asic_prop->sram_user_base_address,
5582 asic_prop->sram_end_address))
5585 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5586 parser->user_cb_size,
5587 asic_prop->dram_user_base_address,
5588 asic_prop->dram_end_address))
5591 /* PMMU and HPMMU addresses are equal, check only one of them */
5592 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5593 parser->user_cb_size,
5594 asic_prop->pmmu.start_addr,
5595 asic_prop->pmmu.end_addr))
5599 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5600 parser->user_cb, parser->user_cb_size);
5605 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5607 struct gaudi_device *gaudi = hdev->asic_specific;
5609 if (parser->queue_type == QUEUE_TYPE_INT)
5610 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5612 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5613 return gaudi_parse_cb_mmu(hdev, parser);
5615 return gaudi_parse_cb_no_mmu(hdev, parser);
5618 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5619 void *kernel_address, u32 len,
5620 u64 cq_addr, u32 cq_val, u32 msi_vec,
5623 struct gaudi_device *gaudi = hdev->asic_specific;
5624 struct packet_msg_prot *cq_pkt;
5627 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5629 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5630 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5633 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5635 cq_pkt->ctl = cpu_to_le32(tmp);
5636 cq_pkt->value = cpu_to_le32(cq_val);
5637 cq_pkt->addr = cpu_to_le64(cq_addr);
5641 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5642 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5643 cq_pkt->ctl = cpu_to_le32(tmp);
5644 cq_pkt->value = cpu_to_le32(1);
5646 if (!gaudi->multi_msi_mode)
5649 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5652 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5654 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5657 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5660 struct packet_lin_dma *lin_dma_pkt;
5661 struct hl_cs_job *job;
5662 u32 cb_size, ctl, err_cause;
5667 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5671 lin_dma_pkt = cb->kernel_address;
5672 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5673 cb_size = sizeof(*lin_dma_pkt);
5675 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5676 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5677 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5678 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5679 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5681 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5682 lin_dma_pkt->src_addr = cpu_to_le64(val);
5683 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5684 lin_dma_pkt->tsize = cpu_to_le32(size);
5686 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5688 dev_err(hdev->dev, "Failed to allocate a new job\n");
5693 /* Verify DMA is OK */
5694 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5695 if (err_cause && !hdev->init_done) {
5697 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5699 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5704 atomic_inc(&job->user_cb->cs_cnt);
5705 job->user_cb_size = cb_size;
5706 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5707 job->patched_cb = job->user_cb;
5708 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5710 hl_debugfs_add_job(hdev, job);
5712 rc = gaudi_send_job_on_qman0(hdev, job);
5713 hl_debugfs_remove_job(hdev, job);
5715 atomic_dec(&cb->cs_cnt);
5717 /* Verify DMA is OK */
5718 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5720 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5722 if (!hdev->init_done) {
5724 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5726 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5733 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5738 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5739 u32 num_regs, u32 val)
5741 struct packet_msg_long *pkt;
5742 struct hl_cs_job *job;
5747 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5749 if (cb_size > SZ_2M) {
5750 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5754 cb = hl_cb_kernel_create(hdev, cb_size, false);
5758 pkt = cb->kernel_address;
5760 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5761 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5762 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5763 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5764 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5766 for (i = 0; i < num_regs ; i++, pkt++) {
5767 pkt->ctl = cpu_to_le32(ctl);
5768 pkt->value = cpu_to_le32(val);
5769 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5772 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5774 dev_err(hdev->dev, "Failed to allocate a new job\n");
5781 atomic_inc(&job->user_cb->cs_cnt);
5782 job->user_cb_size = cb_size;
5783 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5784 job->patched_cb = job->user_cb;
5785 job->job_cb_size = cb_size;
5787 hl_debugfs_add_job(hdev, job);
5789 rc = gaudi_send_job_on_qman0(hdev, job);
5790 hl_debugfs_remove_job(hdev, job);
5792 atomic_dec(&cb->cs_cnt);
5796 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5801 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5802 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5805 struct hl_pending_cb *pending_cb;
5806 struct packet_msg_long *pkt;
5811 mutex_lock(&hdev->fpriv_list_lock);
5812 ctx = hdev->compute_ctx;
5814 /* If no compute context available or context is going down
5815 * memset registers directly
5817 if (!ctx || kref_read(&ctx->refcount) == 0) {
5818 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5819 mutex_unlock(&hdev->fpriv_list_lock);
5823 mutex_unlock(&hdev->fpriv_list_lock);
5825 cb_size = (sizeof(*pkt) * num_regs) +
5826 sizeof(struct packet_msg_prot) * 2;
5828 if (cb_size > SZ_2M) {
5829 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5833 pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5837 cb = hl_cb_kernel_create(hdev, cb_size, false);
5843 pkt = cb->kernel_address;
5845 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5846 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5847 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5848 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5849 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5851 for (i = 0; i < num_regs ; i++, pkt++) {
5852 pkt->ctl = cpu_to_le32(ctl);
5853 pkt->value = cpu_to_le32(val);
5854 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5857 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5859 pending_cb->cb = cb;
5860 pending_cb->cb_size = cb_size;
5861 /* The queue ID MUST be an external queue ID. Otherwise, we will
5862 * have undefined behavior
5864 pending_cb->hw_queue_id = hw_queue_id;
5866 spin_lock(&ctx->pending_cb_lock);
5867 list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5868 spin_unlock(&ctx->pending_cb_lock);
5873 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5879 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5880 num_regs = NUM_OF_SOB_IN_BLOCK;
5881 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5883 dev_err(hdev->dev, "failed resetting SM registers");
5887 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5888 num_regs = NUM_OF_SOB_IN_BLOCK;
5889 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5891 dev_err(hdev->dev, "failed resetting SM registers");
5895 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5896 num_regs = NUM_OF_SOB_IN_BLOCK;
5897 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5899 dev_err(hdev->dev, "failed resetting SM registers");
5903 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5904 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5905 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5907 dev_err(hdev->dev, "failed resetting SM registers");
5911 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5912 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5913 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5915 dev_err(hdev->dev, "failed resetting SM registers");
5919 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5920 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5921 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5923 dev_err(hdev->dev, "failed resetting SM registers");
5927 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5928 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5929 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5930 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5932 dev_err(hdev->dev, "failed resetting SM registers");
5936 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5937 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5938 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5939 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5941 dev_err(hdev->dev, "failed resetting SM registers");
5948 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5950 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5951 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5954 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5955 u64 sob_addr = CFG_BASE +
5956 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5958 u32 dma_offset = i * DMA_CORE_OFFSET;
5960 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5961 lower_32_bits(sob_addr));
5962 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5963 upper_32_bits(sob_addr));
5964 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5966 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5967 * modified by the user for SRAM reduction
5970 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5975 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5980 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5981 qman_offset = i * DMA_QMAN_OFFSET;
5982 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5985 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5986 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5987 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5990 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5991 qman_offset = i * TPC_QMAN_OFFSET;
5992 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5995 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5996 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5997 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5998 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6002 static int gaudi_restore_user_registers(struct hl_device *hdev)
6006 rc = gaudi_restore_sm_registers(hdev);
6010 gaudi_restore_dma_registers(hdev);
6011 gaudi_restore_qm_registers(hdev);
6016 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6018 return gaudi_restore_user_registers(hdev);
6021 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6023 struct asic_fixed_properties *prop = &hdev->asic_prop;
6024 struct gaudi_device *gaudi = hdev->asic_specific;
6025 u64 addr = prop->mmu_pgt_addr;
6026 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6028 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6031 return gaudi_memset_device_memory(hdev, addr, size, 0);
6034 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6039 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6040 bool user_address, u32 *val)
6042 struct asic_fixed_properties *prop = &hdev->asic_prop;
6043 struct gaudi_device *gaudi = hdev->asic_specific;
6044 u64 hbm_bar_addr, host_phys_end;
6047 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6049 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6051 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6052 (hdev->clock_gating_mask &
6053 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6055 dev_err_ratelimited(hdev->dev,
6056 "Can't read register - clock gating is enabled!\n");
6059 *val = RREG32(addr - CFG_BASE);
6062 } else if ((addr >= SRAM_BASE_ADDR) &&
6063 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6064 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6065 (addr - SRAM_BASE_ADDR));
6066 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6067 u64 bar_base_addr = DRAM_PHYS_BASE +
6068 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6070 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6071 if (hbm_bar_addr != U64_MAX) {
6072 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6073 (addr - bar_base_addr));
6075 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6078 if (hbm_bar_addr == U64_MAX)
6080 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6081 user_address && !iommu_present(&pci_bus_type)) {
6082 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6090 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6091 bool user_address, u32 val)
6093 struct asic_fixed_properties *prop = &hdev->asic_prop;
6094 struct gaudi_device *gaudi = hdev->asic_specific;
6095 u64 hbm_bar_addr, host_phys_end;
6098 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6100 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6102 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6103 (hdev->clock_gating_mask &
6104 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6106 dev_err_ratelimited(hdev->dev,
6107 "Can't write register - clock gating is enabled!\n");
6110 WREG32(addr - CFG_BASE, val);
6113 } else if ((addr >= SRAM_BASE_ADDR) &&
6114 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6115 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6116 (addr - SRAM_BASE_ADDR));
6117 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6118 u64 bar_base_addr = DRAM_PHYS_BASE +
6119 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6121 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6122 if (hbm_bar_addr != U64_MAX) {
6123 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6124 (addr - bar_base_addr));
6126 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6129 if (hbm_bar_addr == U64_MAX)
6131 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6132 user_address && !iommu_present(&pci_bus_type)) {
6133 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6141 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6142 bool user_address, u64 *val)
6144 struct asic_fixed_properties *prop = &hdev->asic_prop;
6145 struct gaudi_device *gaudi = hdev->asic_specific;
6146 u64 hbm_bar_addr, host_phys_end;
6149 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6151 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6153 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6154 (hdev->clock_gating_mask &
6155 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6157 dev_err_ratelimited(hdev->dev,
6158 "Can't read register - clock gating is enabled!\n");
6161 u32 val_l = RREG32(addr - CFG_BASE);
6162 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6164 *val = (((u64) val_h) << 32) | val_l;
6167 } else if ((addr >= SRAM_BASE_ADDR) &&
6168 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6169 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6170 (addr - SRAM_BASE_ADDR));
6172 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6173 u64 bar_base_addr = DRAM_PHYS_BASE +
6174 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6176 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6177 if (hbm_bar_addr != U64_MAX) {
6178 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6179 (addr - bar_base_addr));
6181 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6184 if (hbm_bar_addr == U64_MAX)
6186 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6187 user_address && !iommu_present(&pci_bus_type)) {
6188 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6196 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6197 bool user_address, u64 val)
6199 struct asic_fixed_properties *prop = &hdev->asic_prop;
6200 struct gaudi_device *gaudi = hdev->asic_specific;
6201 u64 hbm_bar_addr, host_phys_end;
6204 host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6206 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6208 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6209 (hdev->clock_gating_mask &
6210 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6212 dev_err_ratelimited(hdev->dev,
6213 "Can't write register - clock gating is enabled!\n");
6216 WREG32(addr - CFG_BASE, lower_32_bits(val));
6217 WREG32(addr + sizeof(u32) - CFG_BASE,
6218 upper_32_bits(val));
6221 } else if ((addr >= SRAM_BASE_ADDR) &&
6222 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6223 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6224 (addr - SRAM_BASE_ADDR));
6226 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6227 u64 bar_base_addr = DRAM_PHYS_BASE +
6228 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6230 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6231 if (hbm_bar_addr != U64_MAX) {
6232 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6233 (addr - bar_base_addr));
6235 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6238 if (hbm_bar_addr == U64_MAX)
6240 } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6241 user_address && !iommu_present(&pci_bus_type)) {
6242 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6250 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6251 u32 size_to_dma, dma_addr_t dma_addr)
6257 dma_offset = dma_id * DMA_CORE_OFFSET;
6259 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6260 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6261 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6262 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6263 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6264 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6265 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6267 rc = hl_poll_timeout(
6269 mmDMA0_CORE_STS0 + dma_offset,
6271 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6277 "DMA %d timed-out during reading of 0x%llx\n",
6282 /* Verify DMA is OK */
6283 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6285 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6287 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6289 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6297 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6300 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6301 struct gaudi_device *gaudi = hdev->asic_specific;
6302 u64 dma_offset, qm_offset;
6303 dma_addr_t dma_addr;
6308 kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6311 GFP_KERNEL | __GFP_ZERO);
6316 mutex_lock(&gaudi->clk_gate_mutex);
6318 hdev->asic_funcs->disable_clock_gating(hdev);
6320 hdev->asic_funcs->hw_queues_lock(hdev);
6322 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6323 dma_offset = dma_id * DMA_CORE_OFFSET;
6324 qm_offset = dma_id * DMA_QMAN_OFFSET;
6325 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6326 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6329 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6330 dma_offset = dma_id * DMA_CORE_OFFSET;
6331 qm_offset = dma_id * DMA_QMAN_OFFSET;
6332 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6333 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6336 dev_err_ratelimited(hdev->dev,
6337 "Can't read via DMA because it is BUSY\n");
6343 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6344 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6345 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6347 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6348 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6351 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6353 /* Verify DMA is OK */
6354 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6357 "Clearing DMA0 engine from errors (cause 0x%x)\n",
6359 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6364 size_to_dma = SZ_2M;
6366 while (size_left > 0) {
6368 if (size_left < SZ_2M)
6369 size_to_dma = size_left;
6371 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6376 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6378 if (size_left <= SZ_2M)
6386 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6387 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6390 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6391 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6393 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6396 hdev->asic_funcs->hw_queues_unlock(hdev);
6398 hdev->asic_funcs->set_clock_gating(hdev);
6400 mutex_unlock(&gaudi->clk_gate_mutex);
6402 hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6408 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6410 struct gaudi_device *gaudi = hdev->asic_specific;
6412 if (hdev->hard_reset_pending)
6415 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6416 (addr - gaudi->hbm_bar_cur_addr));
6419 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6421 struct gaudi_device *gaudi = hdev->asic_specific;
6423 if (hdev->hard_reset_pending)
6426 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6427 (addr - gaudi->hbm_bar_cur_addr));
6430 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6432 /* mask to zero the MMBP and ASID bits */
6433 WREG32_AND(reg, ~0x7FF);
6434 WREG32_OR(reg, asid);
6437 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6439 struct gaudi_device *gaudi = hdev->asic_specific;
6441 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6444 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6445 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6449 mutex_lock(&gaudi->clk_gate_mutex);
6451 hdev->asic_funcs->disable_clock_gating(hdev);
6453 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6454 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6455 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6456 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6457 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6459 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6460 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6461 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6462 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6463 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6465 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6466 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6467 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6468 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6469 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6471 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6472 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6473 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6474 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6475 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6477 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6478 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6479 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6480 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6481 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6483 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6484 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6485 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6486 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6487 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6489 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6490 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6491 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6492 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6493 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6495 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6496 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6497 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6498 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6499 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6501 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6502 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6503 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6504 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6505 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6506 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6507 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6508 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6510 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6511 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6512 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6513 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6514 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6515 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6516 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6518 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6519 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6520 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6521 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6522 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6523 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6524 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6526 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6527 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6528 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6529 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6530 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6531 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6532 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6534 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6535 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6536 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6537 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6538 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6539 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6540 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6542 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6543 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6544 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6545 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6546 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6547 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6548 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6550 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6551 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6552 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6553 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6554 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6555 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6556 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6558 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6559 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6560 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6561 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6562 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6563 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6564 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6566 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6567 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6568 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6569 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6570 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6571 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6572 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6574 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6575 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6576 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6577 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6578 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6579 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6580 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6581 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6582 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6583 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6585 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6586 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6587 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6588 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6589 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6590 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6591 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6592 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6593 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6594 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6595 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6596 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6598 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6599 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6601 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6603 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6605 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6607 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6611 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6612 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6614 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6616 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6618 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6620 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6624 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6625 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6627 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6629 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6631 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6633 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6637 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6638 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6640 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6642 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6644 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6646 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6650 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6651 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6653 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6655 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6657 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6659 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6663 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6664 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6666 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6668 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6670 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6672 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6676 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6677 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6679 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6681 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6683 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6685 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6689 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6690 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6692 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6694 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6696 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6698 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6702 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6703 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6705 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6707 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6709 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6711 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6715 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6716 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6718 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6720 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6722 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6724 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6728 hdev->asic_funcs->set_clock_gating(hdev);
6730 mutex_unlock(&gaudi->clk_gate_mutex);
6733 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6734 struct hl_cs_job *job)
6736 struct packet_msg_prot *fence_pkt;
6738 dma_addr_t fence_dma_addr;
6740 u32 tmp, timeout, dma_offset;
6744 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6746 timeout = HL_DEVICE_TIMEOUT_USEC;
6748 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6749 dev_err_ratelimited(hdev->dev,
6750 "Can't send driver job on QMAN0 because the device is not idle\n");
6754 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6758 "Failed to allocate fence memory for QMAN0\n");
6762 cb = job->patched_cb;
6764 fence_pkt = cb->kernel_address +
6765 job->job_cb_size - sizeof(struct packet_msg_prot);
6767 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6768 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6769 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6771 fence_pkt->ctl = cpu_to_le32(tmp);
6772 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6773 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6775 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6777 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6779 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6780 job->job_cb_size, cb->bus_address);
6782 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6783 goto free_fence_ptr;
6786 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6787 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6790 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6792 if (rc == -ETIMEDOUT) {
6793 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6794 goto free_fence_ptr;
6798 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6799 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6801 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6806 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6808 if (event_type >= GAUDI_EVENT_SIZE)
6809 goto event_not_supported;
6811 if (!gaudi_irq_map_table[event_type].valid)
6812 goto event_not_supported;
6814 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6818 event_not_supported:
6819 snprintf(desc, size, "N/A");
6822 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6823 u32 x_y, bool is_write)
6825 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6827 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6828 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6831 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6832 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6836 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6837 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6841 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6842 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6846 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6847 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6852 goto unknown_initiator;
6855 for (i = 0 ; i < 2 ; i++) {
6856 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6857 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6861 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6862 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6863 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6865 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6868 return "DMA0 or DMA2";
6869 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6870 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6871 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6873 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6876 return "DMA1 or DMA3";
6877 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6878 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6879 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6881 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6884 return "DMA4 or DMA6";
6885 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6886 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6887 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6889 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6892 return "DMA5 or DMA7";
6896 return "unknown initiator";
6899 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6902 u32 val, x_y, axi_id;
6904 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6905 RREG32(mmMMU_UP_RAZWI_READ_ID);
6906 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6907 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6908 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6909 RAZWI_INITIATOR_AXI_ID_SHIFT);
6912 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6913 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6915 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6918 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6920 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6921 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6923 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6924 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6926 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6928 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6929 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6931 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6933 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6935 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6938 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6939 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6940 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6941 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6942 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6943 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6944 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6945 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6946 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6947 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6948 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6950 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6952 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6955 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6957 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6958 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6960 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6961 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6963 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6965 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6966 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6968 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6970 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6978 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6980 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6981 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6982 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6983 RAZWI_INITIATOR_AXI_ID_MASK);
6985 return "unknown initiator";
6988 static void gaudi_print_razwi_info(struct hl_device *hdev)
6990 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6991 dev_err_ratelimited(hdev->dev,
6992 "RAZWI event caused by illegal write of %s\n",
6993 gaudi_get_razwi_initiator_name(hdev, true));
6994 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6997 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6998 dev_err_ratelimited(hdev->dev,
6999 "RAZWI event caused by illegal read of %s\n",
7000 gaudi_get_razwi_initiator_name(hdev, false));
7001 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7005 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7007 struct gaudi_device *gaudi = hdev->asic_specific;
7011 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7014 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7015 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7016 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7018 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7020 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7023 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7026 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7027 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7028 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7030 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7032 dev_err_ratelimited(hdev->dev,
7033 "MMU access error on va 0x%llx\n", addr);
7035 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7040 * +-------------------+------------------------------------------------------+
7041 * | Configuration Reg | Description |
7043 * +-------------------+------------------------------------------------------+
7044 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
7045 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
7046 * | |0xF34 memory wrappers 63:32 |
7047 * | |0xF38 memory wrappers 95:64 |
7048 * | |0xF3C memory wrappers 127:96 |
7049 * +-------------------+------------------------------------------------------+
7050 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
7051 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
7052 * | |0xF44 memory wrappers 63:32 |
7053 * | |0xF48 memory wrappers 95:64 |
7054 * | |0xF4C memory wrappers 127:96 |
7055 * +-------------------+------------------------------------------------------+
7057 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7058 struct ecc_info_extract_params *params, u64 *ecc_address,
7059 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7061 struct gaudi_device *gaudi = hdev->asic_specific;
7062 u32 i, num_mem_regs, reg, err_bit;
7063 u64 err_addr, err_word = 0;
7066 num_mem_regs = params->num_memories / 32 +
7067 ((params->num_memories % 32) ? 1 : 0);
7069 if (params->block_address >= CFG_BASE)
7070 params->block_address -= CFG_BASE;
7073 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7075 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7077 if (params->disable_clock_gating) {
7078 mutex_lock(&gaudi->clk_gate_mutex);
7079 hdev->asic_funcs->disable_clock_gating(hdev);
7082 /* Set invalid wrapper index */
7083 *memory_wrapper_idx = 0xFF;
7085 /* Iterate through memory wrappers, a single bit must be set */
7086 for (i = 0 ; i < num_mem_regs ; i++) {
7088 err_word = RREG32(err_addr);
7090 err_bit = __ffs(err_word);
7091 *memory_wrapper_idx = err_bit + (32 * i);
7096 if (*memory_wrapper_idx == 0xFF) {
7097 dev_err(hdev->dev, "ECC error information cannot be found\n");
7099 goto enable_clk_gate;
7102 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7103 *memory_wrapper_idx);
7106 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7108 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7110 /* Clear error indication */
7111 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7113 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7115 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7117 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7120 if (params->disable_clock_gating) {
7121 hdev->asic_funcs->set_clock_gating(hdev);
7123 mutex_unlock(&gaudi->clk_gate_mutex);
7130 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7132 * @idx: the current pi/ci value
7133 * @q_len: the queue length (power of 2)
7135 * @return the cyclically decremented index
7137 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7139 u32 mask = q_len - 1;
7142 * modular decrement is equivalent to adding (queue_size -1)
7143 * later we take LSBs to make sure the value is in the
7144 * range [0, queue_len - 1]
7146 return (idx + q_len - 1) & mask;
7150 * gaudi_print_sw_config_stream_data - print SW config stream data
7152 * @hdev: pointer to the habanalabs device structure
7153 * @stream: the QMAN's stream
7154 * @qman_base: base address of QMAN registers block
7156 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7159 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7160 u32 cq_ptr_lo_off, size;
7162 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7164 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7165 stream * cq_ptr_lo_off;
7166 cq_ptr_hi = cq_ptr_lo +
7167 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7168 cq_tsize = cq_ptr_lo +
7169 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7171 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7172 size = RREG32(cq_tsize);
7173 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7174 stream, cq_ptr, size);
7178 * gaudi_print_last_pqes_on_err - print last PQEs on error
7180 * @hdev: pointer to the habanalabs device structure
7181 * @qid_base: first QID of the QMAN (out of 4 streams)
7182 * @stream: the QMAN's stream
7183 * @qman_base: base address of QMAN registers block
7184 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7186 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7187 u32 stream, u64 qman_base,
7190 u32 ci, qm_ci_stream_off, queue_len;
7191 struct hl_hw_queue *q;
7195 q = &hdev->kernel_queues[qid_base + stream];
7197 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7198 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7199 stream * qm_ci_stream_off;
7201 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7202 q->int_queue_len : HL_QUEUE_LENGTH;
7204 hdev->asic_funcs->hw_queues_lock(hdev);
7207 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7211 /* we should start printing form ci -1 */
7212 ci = gaudi_queue_idx_dec(ci, queue_len);
7214 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7219 bd = q->kernel_address;
7222 len = le32_to_cpu(bd->len);
7223 /* len 0 means uninitialized entry- break */
7227 addr = le64_to_cpu(bd->ptr);
7229 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7230 stream, ci, addr, len);
7232 /* get previous ci, wrap if needed */
7233 ci = gaudi_queue_idx_dec(ci, queue_len);
7236 hdev->asic_funcs->hw_queues_unlock(hdev);
7240 * print_qman_data_on_err - extract QMAN data on error
7242 * @hdev: pointer to the habanalabs device structure
7243 * @qid_base: first QID of the QMAN (out of 4 streams)
7244 * @stream: the QMAN's stream
7245 * @qman_base: base address of QMAN registers block
7247 * This function attempt to exatract as much data as possible on QMAN error.
7248 * On upper CP print the SW config stream data and last 8 PQEs.
7249 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7251 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7252 u32 stream, u64 qman_base)
7256 if (stream != QMAN_STREAMS) {
7257 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7262 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7264 for (i = 0; i < QMAN_STREAMS; i++)
7265 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7269 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7270 const char *qm_name,
7274 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7275 u64 glbl_sts_addr, arb_err_addr;
7278 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7279 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7281 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7282 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7283 glbl_sts_clr_val = 0;
7284 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7289 if (i == QMAN_STREAMS)
7290 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7292 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7294 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7295 if (glbl_sts_val & BIT(j)) {
7296 dev_err_ratelimited(hdev->dev,
7297 "%s %s. err cause: %s\n",
7299 gaudi_qman_error_cause[j]);
7300 glbl_sts_clr_val |= BIT(j);
7304 /* Write 1 clear errors */
7305 if (!hdev->stop_on_err)
7306 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7308 print_qman_data_on_err(hdev, qid_base, i, qman_base);
7311 arb_err_val = RREG32(arb_err_addr);
7316 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7317 if (arb_err_val & BIT(j)) {
7318 dev_err_ratelimited(hdev->dev,
7319 "%s ARB_ERR. err cause: %s\n",
7321 gaudi_qman_arb_error_cause[j]);
7326 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7327 struct hl_eq_sm_sei_data *sei_data)
7329 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7331 switch (sei_data->sei_cause) {
7332 case SM_SEI_SO_OVERFLOW:
7334 "SM %u SEI Error: SO %u overflow/underflow",
7335 index, le32_to_cpu(sei_data->sei_log));
7337 case SM_SEI_LBW_4B_UNALIGNED:
7339 "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7340 index, le32_to_cpu(sei_data->sei_log));
7342 case SM_SEI_AXI_RESPONSE_ERR:
7344 "SM %u SEI Error: AXI ID %u response error",
7345 index, le32_to_cpu(sei_data->sei_log));
7348 dev_err(hdev->dev, "Unknown SM SEI cause %u",
7349 le32_to_cpu(sei_data->sei_log));
7354 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7355 struct hl_eq_ecc_data *ecc_data)
7357 struct ecc_info_extract_params params;
7358 u64 ecc_address = 0, ecc_syndrom = 0;
7359 u8 index, memory_wrapper_idx = 0;
7360 bool extract_info_from_fw;
7363 switch (event_type) {
7364 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7365 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7366 extract_info_from_fw = true;
7368 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7369 index = event_type - GAUDI_EVENT_TPC0_SERR;
7370 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7371 params.num_memories = 90;
7372 params.derr = false;
7373 params.disable_clock_gating = true;
7374 extract_info_from_fw = false;
7376 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7377 index = event_type - GAUDI_EVENT_TPC0_DERR;
7378 params.block_address =
7379 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7380 params.num_memories = 90;
7382 params.disable_clock_gating = true;
7383 extract_info_from_fw = false;
7385 case GAUDI_EVENT_MME0_ACC_SERR:
7386 case GAUDI_EVENT_MME1_ACC_SERR:
7387 case GAUDI_EVENT_MME2_ACC_SERR:
7388 case GAUDI_EVENT_MME3_ACC_SERR:
7389 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7390 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7391 params.num_memories = 128;
7392 params.derr = false;
7393 params.disable_clock_gating = true;
7394 extract_info_from_fw = false;
7396 case GAUDI_EVENT_MME0_ACC_DERR:
7397 case GAUDI_EVENT_MME1_ACC_DERR:
7398 case GAUDI_EVENT_MME2_ACC_DERR:
7399 case GAUDI_EVENT_MME3_ACC_DERR:
7400 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7401 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7402 params.num_memories = 128;
7404 params.disable_clock_gating = true;
7405 extract_info_from_fw = false;
7407 case GAUDI_EVENT_MME0_SBAB_SERR:
7408 case GAUDI_EVENT_MME1_SBAB_SERR:
7409 case GAUDI_EVENT_MME2_SBAB_SERR:
7410 case GAUDI_EVENT_MME3_SBAB_SERR:
7411 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7412 params.block_address =
7413 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7414 params.num_memories = 33;
7415 params.derr = false;
7416 params.disable_clock_gating = true;
7417 extract_info_from_fw = false;
7419 case GAUDI_EVENT_MME0_SBAB_DERR:
7420 case GAUDI_EVENT_MME1_SBAB_DERR:
7421 case GAUDI_EVENT_MME2_SBAB_DERR:
7422 case GAUDI_EVENT_MME3_SBAB_DERR:
7423 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7424 params.block_address =
7425 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7426 params.num_memories = 33;
7428 params.disable_clock_gating = true;
7429 extract_info_from_fw = false;
7435 if (extract_info_from_fw) {
7436 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7437 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7438 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7440 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7441 &ecc_syndrom, &memory_wrapper_idx);
7447 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7448 ecc_address, ecc_syndrom, memory_wrapper_idx);
7451 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7458 switch (event_type) {
7459 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7460 index = event_type - GAUDI_EVENT_TPC0_QM;
7461 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7462 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7463 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7465 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7466 index = event_type - GAUDI_EVENT_MME0_QM;
7467 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7468 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7469 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7471 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7472 index = event_type - GAUDI_EVENT_DMA0_QM;
7473 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7474 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7477 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7478 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7480 case GAUDI_EVENT_NIC0_QM0:
7481 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7482 qman_base = mmNIC0_QM0_BASE;
7483 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7485 case GAUDI_EVENT_NIC0_QM1:
7486 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7487 qman_base = mmNIC0_QM1_BASE;
7488 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7490 case GAUDI_EVENT_NIC1_QM0:
7491 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7492 qman_base = mmNIC1_QM0_BASE;
7493 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7495 case GAUDI_EVENT_NIC1_QM1:
7496 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7497 qman_base = mmNIC1_QM1_BASE;
7498 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7500 case GAUDI_EVENT_NIC2_QM0:
7501 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7502 qman_base = mmNIC2_QM0_BASE;
7503 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7505 case GAUDI_EVENT_NIC2_QM1:
7506 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7507 qman_base = mmNIC2_QM1_BASE;
7508 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7510 case GAUDI_EVENT_NIC3_QM0:
7511 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7512 qman_base = mmNIC3_QM0_BASE;
7513 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7515 case GAUDI_EVENT_NIC3_QM1:
7516 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7517 qman_base = mmNIC3_QM1_BASE;
7518 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7520 case GAUDI_EVENT_NIC4_QM0:
7521 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7522 qman_base = mmNIC4_QM0_BASE;
7523 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7525 case GAUDI_EVENT_NIC4_QM1:
7526 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7527 qman_base = mmNIC4_QM1_BASE;
7528 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7534 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7537 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7542 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7543 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7547 gaudi_print_razwi_info(hdev);
7548 gaudi_print_mmu_error_info(hdev);
7552 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7553 struct cpucp_pkt_sync_err *sync_err)
7555 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7557 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7558 sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7561 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7562 struct hl_eq_fw_alive *fw_alive)
7565 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7566 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7567 "Minor" : "Critical", fw_alive->process_id,
7568 fw_alive->thread_id, fw_alive->uptime_seconds);
7571 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7573 struct gaudi_device *gaudi = hdev->asic_specific;
7575 /* Unmask all IRQs since some could have been received
7576 * during the soft reset
7578 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7581 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7582 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7584 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7587 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7588 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7589 if (!hbm_ecc_data) {
7590 dev_err(hdev->dev, "No FW ECC data");
7594 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7595 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7596 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7597 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7598 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7599 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7600 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7601 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7602 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7603 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7604 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7605 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7606 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7607 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7610 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7611 device, ch, wr_par, rd_par, ca_par, serr, derr);
7613 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7614 device, ch, hbm_ecc_data->first_addr, type,
7615 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7616 hbm_ecc_data->dec_cnt);
7620 if (hdev->asic_prop.fw_security_enabled) {
7621 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7625 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7626 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7627 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7628 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7632 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7633 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7634 (val >> 2) & 0x1, (val >> 3) & 0x1,
7637 val2 = RREG32(base + ch * 0x1000 + 0x060);
7639 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7641 RREG32(base + ch * 0x1000 + 0x064),
7642 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7643 (val2 & 0xFF0000) >> 16,
7644 (val2 & 0xFF000000) >> 24);
7647 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7648 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7652 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7653 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7654 (val >> 2) & 0x1, (val >> 3) & 0x1,
7657 val2 = RREG32(base + ch * 0x1000 + 0x070);
7659 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7661 RREG32(base + ch * 0x1000 + 0x074),
7662 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7663 (val2 & 0xFF0000) >> 16,
7664 (val2 & 0xFF000000) >> 24);
7667 /* Clear interrupts */
7668 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7669 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7670 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7671 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7672 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7673 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7676 val = RREG32(base + 0x8F30);
7677 val2 = RREG32(base + 0x8F34);
7681 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7684 val = RREG32(base + 0x8F40);
7685 val2 = RREG32(base + 0x8F44);
7689 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7696 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7698 switch (hbm_event_type) {
7699 case GAUDI_EVENT_HBM0_SPI_0:
7700 case GAUDI_EVENT_HBM0_SPI_1:
7702 case GAUDI_EVENT_HBM1_SPI_0:
7703 case GAUDI_EVENT_HBM1_SPI_1:
7705 case GAUDI_EVENT_HBM2_SPI_0:
7706 case GAUDI_EVENT_HBM2_SPI_1:
7708 case GAUDI_EVENT_HBM3_SPI_0:
7709 case GAUDI_EVENT_HBM3_SPI_1:
7715 /* Should never happen */
7719 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7720 char *interrupt_name)
7722 struct gaudi_device *gaudi = hdev->asic_specific;
7723 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7724 bool soft_reset_required = false;
7726 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7727 * gating, and thus cannot be done in CPU-CP and should be done instead
7731 mutex_lock(&gaudi->clk_gate_mutex);
7733 hdev->asic_funcs->disable_clock_gating(hdev);
7735 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7736 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7738 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7739 if (tpc_interrupts_cause & BIT(i)) {
7740 dev_err_ratelimited(hdev->dev,
7741 "TPC%d_%s interrupt cause: %s\n",
7742 tpc_id, interrupt_name,
7743 gaudi_tpc_interrupts_cause[i]);
7744 /* If this is QM error, we need to soft-reset */
7746 soft_reset_required = true;
7749 /* Clear interrupts */
7750 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7752 hdev->asic_funcs->set_clock_gating(hdev);
7754 mutex_unlock(&gaudi->clk_gate_mutex);
7756 return soft_reset_required;
7759 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7761 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7764 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7766 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7769 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7772 switch (event_type) {
7773 case GAUDI_EVENT_FIX_POWER_ENV_S:
7774 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7775 dev_info_ratelimited(hdev->dev,
7776 "Clock throttling due to power consumption\n");
7779 case GAUDI_EVENT_FIX_POWER_ENV_E:
7780 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7781 dev_info_ratelimited(hdev->dev,
7782 "Power envelop is safe, back to optimal clock\n");
7785 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7786 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7787 dev_info_ratelimited(hdev->dev,
7788 "Clock throttling due to overheating\n");
7791 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7792 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7793 dev_info_ratelimited(hdev->dev,
7794 "Thermal envelop is safe, back to optimal clock\n");
7798 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7804 static void gaudi_handle_eqe(struct hl_device *hdev,
7805 struct hl_eq_entry *eq_entry)
7807 struct gaudi_device *gaudi = hdev->asic_specific;
7808 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7809 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7810 >> EQ_CTL_EVENT_TYPE_SHIFT);
7812 bool reset_required;
7814 gaudi->events_stat[event_type]++;
7815 gaudi->events_stat_aggregate[event_type]++;
7817 switch (event_type) {
7818 case GAUDI_EVENT_PCIE_CORE_DERR:
7819 case GAUDI_EVENT_PCIE_IF_DERR:
7820 case GAUDI_EVENT_PCIE_PHY_DERR:
7821 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7822 case GAUDI_EVENT_MME0_ACC_DERR:
7823 case GAUDI_EVENT_MME0_SBAB_DERR:
7824 case GAUDI_EVENT_MME1_ACC_DERR:
7825 case GAUDI_EVENT_MME1_SBAB_DERR:
7826 case GAUDI_EVENT_MME2_ACC_DERR:
7827 case GAUDI_EVENT_MME2_SBAB_DERR:
7828 case GAUDI_EVENT_MME3_ACC_DERR:
7829 case GAUDI_EVENT_MME3_SBAB_DERR:
7830 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7832 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7833 case GAUDI_EVENT_PSOC_MEM_DERR:
7834 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7835 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7836 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7837 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7838 case GAUDI_EVENT_MMU_DERR:
7839 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7840 gaudi_print_irq_info(hdev, event_type, true);
7841 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7844 case GAUDI_EVENT_GIC500:
7845 case GAUDI_EVENT_AXI_ECC:
7846 case GAUDI_EVENT_L2_RAM_ECC:
7847 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7848 gaudi_print_irq_info(hdev, event_type, false);
7851 case GAUDI_EVENT_HBM0_SPI_0:
7852 case GAUDI_EVENT_HBM1_SPI_0:
7853 case GAUDI_EVENT_HBM2_SPI_0:
7854 case GAUDI_EVENT_HBM3_SPI_0:
7855 gaudi_print_irq_info(hdev, event_type, false);
7856 gaudi_hbm_read_interrupts(hdev,
7857 gaudi_hbm_event_to_dev(event_type),
7858 &eq_entry->hbm_ecc_data);
7861 case GAUDI_EVENT_HBM0_SPI_1:
7862 case GAUDI_EVENT_HBM1_SPI_1:
7863 case GAUDI_EVENT_HBM2_SPI_1:
7864 case GAUDI_EVENT_HBM3_SPI_1:
7865 gaudi_print_irq_info(hdev, event_type, false);
7866 gaudi_hbm_read_interrupts(hdev,
7867 gaudi_hbm_event_to_dev(event_type),
7868 &eq_entry->hbm_ecc_data);
7869 hl_fw_unmask_irq(hdev, event_type);
7872 case GAUDI_EVENT_TPC0_DEC:
7873 case GAUDI_EVENT_TPC1_DEC:
7874 case GAUDI_EVENT_TPC2_DEC:
7875 case GAUDI_EVENT_TPC3_DEC:
7876 case GAUDI_EVENT_TPC4_DEC:
7877 case GAUDI_EVENT_TPC5_DEC:
7878 case GAUDI_EVENT_TPC6_DEC:
7879 case GAUDI_EVENT_TPC7_DEC:
7880 gaudi_print_irq_info(hdev, event_type, true);
7881 reset_required = gaudi_tpc_read_interrupts(hdev,
7882 tpc_dec_event_to_tpc_id(event_type),
7883 "AXI_SLV_DEC_Error");
7884 if (reset_required) {
7885 dev_err(hdev->dev, "hard reset required due to %s\n",
7886 gaudi_irq_map_table[event_type].name);
7890 hl_fw_unmask_irq(hdev, event_type);
7894 case GAUDI_EVENT_TPC0_KRN_ERR:
7895 case GAUDI_EVENT_TPC1_KRN_ERR:
7896 case GAUDI_EVENT_TPC2_KRN_ERR:
7897 case GAUDI_EVENT_TPC3_KRN_ERR:
7898 case GAUDI_EVENT_TPC4_KRN_ERR:
7899 case GAUDI_EVENT_TPC5_KRN_ERR:
7900 case GAUDI_EVENT_TPC6_KRN_ERR:
7901 case GAUDI_EVENT_TPC7_KRN_ERR:
7902 gaudi_print_irq_info(hdev, event_type, true);
7903 reset_required = gaudi_tpc_read_interrupts(hdev,
7904 tpc_krn_event_to_tpc_id(event_type),
7906 if (reset_required) {
7907 dev_err(hdev->dev, "hard reset required due to %s\n",
7908 gaudi_irq_map_table[event_type].name);
7912 hl_fw_unmask_irq(hdev, event_type);
7916 case GAUDI_EVENT_PCIE_CORE_SERR:
7917 case GAUDI_EVENT_PCIE_IF_SERR:
7918 case GAUDI_EVENT_PCIE_PHY_SERR:
7919 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7920 case GAUDI_EVENT_MME0_ACC_SERR:
7921 case GAUDI_EVENT_MME0_SBAB_SERR:
7922 case GAUDI_EVENT_MME1_ACC_SERR:
7923 case GAUDI_EVENT_MME1_SBAB_SERR:
7924 case GAUDI_EVENT_MME2_ACC_SERR:
7925 case GAUDI_EVENT_MME2_SBAB_SERR:
7926 case GAUDI_EVENT_MME3_ACC_SERR:
7927 case GAUDI_EVENT_MME3_SBAB_SERR:
7928 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7929 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7930 case GAUDI_EVENT_PSOC_MEM_SERR:
7931 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7932 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7933 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7934 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7936 case GAUDI_EVENT_MMU_SERR:
7937 gaudi_print_irq_info(hdev, event_type, true);
7938 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7939 hl_fw_unmask_irq(hdev, event_type);
7942 case GAUDI_EVENT_PCIE_DEC:
7943 case GAUDI_EVENT_MME0_WBC_RSP:
7944 case GAUDI_EVENT_MME0_SBAB0_RSP:
7945 case GAUDI_EVENT_MME1_WBC_RSP:
7946 case GAUDI_EVENT_MME1_SBAB0_RSP:
7947 case GAUDI_EVENT_MME2_WBC_RSP:
7948 case GAUDI_EVENT_MME2_SBAB0_RSP:
7949 case GAUDI_EVENT_MME3_WBC_RSP:
7950 case GAUDI_EVENT_MME3_SBAB0_RSP:
7951 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7952 case GAUDI_EVENT_PSOC_AXI_DEC:
7953 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7954 case GAUDI_EVENT_MMU_PAGE_FAULT:
7955 case GAUDI_EVENT_MMU_WR_PERM:
7956 case GAUDI_EVENT_RAZWI_OR_ADC:
7957 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7958 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7959 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7961 case GAUDI_EVENT_NIC0_QM0:
7962 case GAUDI_EVENT_NIC0_QM1:
7963 case GAUDI_EVENT_NIC1_QM0:
7964 case GAUDI_EVENT_NIC1_QM1:
7965 case GAUDI_EVENT_NIC2_QM0:
7966 case GAUDI_EVENT_NIC2_QM1:
7967 case GAUDI_EVENT_NIC3_QM0:
7968 case GAUDI_EVENT_NIC3_QM1:
7969 case GAUDI_EVENT_NIC4_QM0:
7970 case GAUDI_EVENT_NIC4_QM1:
7971 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7972 gaudi_print_irq_info(hdev, event_type, true);
7973 gaudi_handle_qman_err(hdev, event_type);
7974 hl_fw_unmask_irq(hdev, event_type);
7977 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7978 gaudi_print_irq_info(hdev, event_type, true);
7981 case GAUDI_EVENT_TPC0_BMON_SPMU:
7982 case GAUDI_EVENT_TPC1_BMON_SPMU:
7983 case GAUDI_EVENT_TPC2_BMON_SPMU:
7984 case GAUDI_EVENT_TPC3_BMON_SPMU:
7985 case GAUDI_EVENT_TPC4_BMON_SPMU:
7986 case GAUDI_EVENT_TPC5_BMON_SPMU:
7987 case GAUDI_EVENT_TPC6_BMON_SPMU:
7988 case GAUDI_EVENT_TPC7_BMON_SPMU:
7989 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7990 gaudi_print_irq_info(hdev, event_type, false);
7991 hl_fw_unmask_irq(hdev, event_type);
7994 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7995 gaudi_print_irq_info(hdev, event_type, false);
7996 gaudi_print_sm_sei_info(hdev, event_type,
7997 &eq_entry->sm_sei_data);
7998 hl_fw_unmask_irq(hdev, event_type);
8001 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8002 gaudi_print_clk_change_info(hdev, event_type);
8003 hl_fw_unmask_irq(hdev, event_type);
8006 case GAUDI_EVENT_PSOC_GPIO_U16_0:
8007 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8009 "Received high temp H/W interrupt %d (cause %d)\n",
8013 case GAUDI_EVENT_DEV_RESET_REQ:
8014 gaudi_print_irq_info(hdev, event_type, false);
8017 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8018 gaudi_print_irq_info(hdev, event_type, false);
8019 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8022 case GAUDI_EVENT_FW_ALIVE_S:
8023 gaudi_print_irq_info(hdev, event_type, false);
8024 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8028 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8036 if (hdev->hard_reset_on_fw_events)
8037 hl_device_reset(hdev, HL_RESET_HARD);
8039 hl_fw_unmask_irq(hdev, event_type);
8042 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8045 struct gaudi_device *gaudi = hdev->asic_specific;
8048 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8049 return gaudi->events_stat_aggregate;
8052 *size = (u32) sizeof(gaudi->events_stat);
8053 return gaudi->events_stat;
8056 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8059 struct gaudi_device *gaudi = hdev->asic_specific;
8060 u32 status, timeout_usec;
8063 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8064 hdev->hard_reset_pending)
8068 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8070 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8072 /* L0 & L1 invalidation */
8073 WREG32(mmSTLB_INV_PS, 3);
8074 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8075 WREG32(mmSTLB_INV_PS, 2);
8077 rc = hl_poll_timeout(
8085 WREG32(mmSTLB_INV_SET, 0);
8088 dev_err_ratelimited(hdev->dev,
8089 "MMU cache invalidation timeout\n");
8090 hl_device_reset(hdev, HL_RESET_HARD);
8096 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8097 bool is_hard, u32 flags,
8098 u32 asid, u64 va, u64 size)
8100 /* Treat as invalidate all because there is no range invalidation
8103 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8106 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8107 u32 asid, u64 phys_addr)
8109 u32 status, timeout_usec;
8113 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8115 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8117 WREG32(MMU_ASID, asid);
8118 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8119 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8120 WREG32(MMU_BUSY, 0x80000000);
8122 rc = hl_poll_timeout(
8126 !(status & 0x80000000),
8132 "Timeout during MMU hop0 config of asid %d\n", asid);
8139 static int gaudi_send_heartbeat(struct hl_device *hdev)
8141 struct gaudi_device *gaudi = hdev->asic_specific;
8143 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8146 return hl_fw_send_heartbeat(hdev);
8149 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8151 struct gaudi_device *gaudi = hdev->asic_specific;
8152 struct asic_fixed_properties *prop = &hdev->asic_prop;
8155 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8158 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8159 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8164 if (!strlen(prop->cpucp_info.card_name))
8165 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8168 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8170 set_default_power_values(hdev);
8172 hdev->max_power = prop->max_power_default;
8177 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8178 u8 mask_len, struct seq_file *s)
8180 struct gaudi_device *gaudi = hdev->asic_specific;
8181 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8182 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8183 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8184 unsigned long *mask = (unsigned long *)mask_arr;
8185 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8186 bool is_idle = true, is_eng_idle, is_slave;
8188 int i, dma_id, port;
8190 mutex_lock(&gaudi->clk_gate_mutex);
8192 hdev->asic_funcs->disable_clock_gating(hdev);
8196 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8197 "--- ------- ------------ ---------- -------------\n");
8199 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8200 dma_id = gaudi_dma_assignment[i];
8201 offset = dma_id * DMA_QMAN_OFFSET;
8203 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8204 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8205 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8206 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8207 IS_DMA_IDLE(dma_core_sts0);
8208 is_idle &= is_eng_idle;
8210 if (mask && !is_eng_idle)
8211 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8213 seq_printf(s, fmt, dma_id,
8214 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8215 qm_cgm_sts, dma_core_sts0);
8220 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8221 "--- ------- ------------ ---------- ----------\n");
8223 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8224 offset = i * TPC_QMAN_OFFSET;
8225 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8226 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8227 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8228 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8229 IS_TPC_IDLE(tpc_cfg_sts);
8230 is_idle &= is_eng_idle;
8232 if (mask && !is_eng_idle)
8233 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8235 seq_printf(s, fmt, i,
8236 is_eng_idle ? "Y" : "N",
8237 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8242 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8243 "--- ------- ------------ ---------- -----------\n");
8245 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8246 offset = i * MME_QMAN_OFFSET;
8247 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8248 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8250 /* MME 1 & 3 are slaves, no need to check their QMANs */
8253 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8254 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8255 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8258 is_idle &= is_eng_idle;
8260 if (mask && !is_eng_idle)
8261 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8264 seq_printf(s, fmt, i,
8265 is_eng_idle ? "Y" : "N",
8266 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8268 seq_printf(s, mme_slave_fmt, i,
8269 is_eng_idle ? "Y" : "N", "-",
8275 seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8276 "--- ------- ------------ ----------\n");
8278 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8279 offset = i * NIC_MACRO_QMAN_OFFSET;
8281 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8282 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8283 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8284 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8285 is_idle &= is_eng_idle;
8287 if (mask && !is_eng_idle)
8288 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8290 seq_printf(s, nic_fmt, port,
8291 is_eng_idle ? "Y" : "N",
8292 qm_glbl_sts0, qm_cgm_sts);
8296 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8297 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8298 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8299 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8300 is_idle &= is_eng_idle;
8302 if (mask && !is_eng_idle)
8303 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8305 seq_printf(s, nic_fmt, port,
8306 is_eng_idle ? "Y" : "N",
8307 qm_glbl_sts0, qm_cgm_sts);
8314 hdev->asic_funcs->set_clock_gating(hdev);
8316 mutex_unlock(&gaudi->clk_gate_mutex);
8321 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8322 __acquires(&gaudi->hw_queues_lock)
8324 struct gaudi_device *gaudi = hdev->asic_specific;
8326 spin_lock(&gaudi->hw_queues_lock);
8329 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8330 __releases(&gaudi->hw_queues_lock)
8332 struct gaudi_device *gaudi = hdev->asic_specific;
8334 spin_unlock(&gaudi->hw_queues_lock);
8337 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8339 return hdev->pdev->device;
8342 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8345 struct gaudi_device *gaudi = hdev->asic_specific;
8347 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8350 return hl_fw_get_eeprom_data(hdev, data, max_size);
8354 * this function should be used only during initialization and/or after reset,
8355 * when there are no active users.
8357 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8360 struct gaudi_device *gaudi = hdev->asic_specific;
8365 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8368 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8370 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8372 mutex_lock(&gaudi->clk_gate_mutex);
8374 hdev->asic_funcs->disable_clock_gating(hdev);
8376 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8377 lower_32_bits(tpc_kernel));
8378 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8379 upper_32_bits(tpc_kernel));
8381 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8382 lower_32_bits(tpc_kernel));
8383 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8384 upper_32_bits(tpc_kernel));
8385 /* set a valid LUT pointer, content is of no significance */
8386 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8387 lower_32_bits(tpc_kernel));
8388 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8389 upper_32_bits(tpc_kernel));
8391 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8392 lower_32_bits(CFG_BASE +
8393 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8395 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8396 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8397 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8398 /* wait a bit for the engine to start executing */
8399 usleep_range(1000, 1500);
8401 /* wait until engine has finished executing */
8402 rc = hl_poll_timeout(
8404 mmTPC0_CFG_STATUS + offset,
8406 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8407 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8413 "Timeout while waiting for TPC%d icache prefetch\n",
8415 hdev->asic_funcs->set_clock_gating(hdev);
8416 mutex_unlock(&gaudi->clk_gate_mutex);
8420 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8421 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8423 /* wait a bit for the engine to start executing */
8424 usleep_range(1000, 1500);
8426 /* wait until engine has finished executing */
8427 rc = hl_poll_timeout(
8429 mmTPC0_CFG_STATUS + offset,
8431 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8432 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8438 "Timeout while waiting for TPC%d vector pipe\n",
8440 hdev->asic_funcs->set_clock_gating(hdev);
8441 mutex_unlock(&gaudi->clk_gate_mutex);
8445 rc = hl_poll_timeout(
8447 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8453 hdev->asic_funcs->set_clock_gating(hdev);
8454 mutex_unlock(&gaudi->clk_gate_mutex);
8458 "Timeout while waiting for TPC%d kernel to execute\n",
8466 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8469 struct gaudi_device *gaudi = hdev->asic_specific;
8470 int min_alloc_order, rc, collective_cb_size;
8472 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8475 hdev->internal_cb_pool_virt_addr =
8476 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8477 HOST_SPACE_INTERNAL_CB_SZ,
8478 &hdev->internal_cb_pool_dma_addr,
8479 GFP_KERNEL | __GFP_ZERO);
8481 if (!hdev->internal_cb_pool_virt_addr)
8484 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8485 sizeof(struct packet_fence);
8486 min_alloc_order = ilog2(collective_cb_size);
8488 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8489 if (!hdev->internal_cb_pool) {
8491 "Failed to create internal CB pool\n");
8493 goto free_internal_cb_pool;
8496 rc = gen_pool_add(hdev->internal_cb_pool,
8497 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8498 HOST_SPACE_INTERNAL_CB_SZ, -1);
8501 "Failed to add memory to internal CB pool\n");
8503 goto destroy_internal_cb_pool;
8506 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8507 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8508 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8510 if (!hdev->internal_cb_va_base) {
8512 goto destroy_internal_cb_pool;
8515 mutex_lock(&ctx->mmu_lock);
8516 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8517 hdev->internal_cb_pool_dma_addr,
8518 HOST_SPACE_INTERNAL_CB_SZ);
8520 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8521 mutex_unlock(&ctx->mmu_lock);
8524 goto unreserve_internal_cb_pool;
8528 unreserve_internal_cb_pool:
8529 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8530 HOST_SPACE_INTERNAL_CB_SZ);
8531 destroy_internal_cb_pool:
8532 gen_pool_destroy(hdev->internal_cb_pool);
8533 free_internal_cb_pool:
8534 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8535 HOST_SPACE_INTERNAL_CB_SZ,
8536 hdev->internal_cb_pool_virt_addr,
8537 hdev->internal_cb_pool_dma_addr);
8542 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8545 struct gaudi_device *gaudi = hdev->asic_specific;
8547 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8550 mutex_lock(&ctx->mmu_lock);
8551 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8552 HOST_SPACE_INTERNAL_CB_SZ);
8553 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8554 HOST_SPACE_INTERNAL_CB_SZ);
8555 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8556 mutex_unlock(&ctx->mmu_lock);
8558 gen_pool_destroy(hdev->internal_cb_pool);
8560 hdev->asic_funcs->asic_dma_free_coherent(hdev,
8561 HOST_SPACE_INTERNAL_CB_SZ,
8562 hdev->internal_cb_pool_virt_addr,
8563 hdev->internal_cb_pool_dma_addr);
8566 static int gaudi_ctx_init(struct hl_ctx *ctx)
8568 if (ctx->asid == HL_KERNEL_ASID_ID)
8571 gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8572 return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8575 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8577 if (ctx->asid == HL_KERNEL_ASID_ID)
8580 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8583 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8585 return gaudi_cq_assignment[cq_idx];
8588 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8590 return sizeof(struct packet_msg_short) +
8591 sizeof(struct packet_msg_prot) * 2;
8594 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8596 return sizeof(struct packet_msg_short) * 4 +
8597 sizeof(struct packet_fence) +
8598 sizeof(struct packet_msg_prot) * 2;
8601 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8604 struct hl_cb *cb = (struct hl_cb *) data;
8605 struct packet_msg_short *pkt;
8606 u32 value, ctl, pkt_size = sizeof(*pkt);
8608 pkt = cb->kernel_address + size;
8609 memset(pkt, 0, pkt_size);
8611 /* Inc by 1, Mode ADD */
8612 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8613 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8615 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8616 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8617 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8618 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8619 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8620 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8621 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8623 pkt->value = cpu_to_le32(value);
8624 pkt->ctl = cpu_to_le32(ctl);
8626 return size + pkt_size;
8629 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8632 u32 ctl, pkt_size = sizeof(*pkt);
8634 memset(pkt, 0, pkt_size);
8636 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8637 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8638 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8639 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8640 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8641 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8643 pkt->value = cpu_to_le32(value);
8644 pkt->ctl = cpu_to_le32(ctl);
8649 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8650 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8651 u16 sob_val, u16 mon_id)
8654 u32 ctl, value, pkt_size = sizeof(*pkt);
8655 u16 msg_addr_offset;
8658 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8660 "sob_base %u (mask %#x) is not valid\n",
8661 sob_base, sob_mask);
8666 * monitor_base should be the content of the base0 address registers,
8667 * so it will be added to the msg short offsets
8669 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8672 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8675 memset(pkt, 0, pkt_size);
8677 /* Monitor config packet: bind the monitor to a sync object */
8678 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8679 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8680 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8681 0); /* GREATER OR EQUAL*/
8682 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8684 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8685 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8686 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8687 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8688 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8689 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8690 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8692 pkt->value = cpu_to_le32(value);
8693 pkt->ctl = cpu_to_le32(ctl);
8698 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8700 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8702 memset(pkt, 0, pkt_size);
8704 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8705 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8706 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8708 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8709 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8710 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8711 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8713 pkt->cfg = cpu_to_le32(cfg);
8714 pkt->ctl = cpu_to_le32(ctl);
8719 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8721 u32 offset, nic_index;
8724 case GAUDI_QUEUE_ID_DMA_0_0:
8725 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8727 case GAUDI_QUEUE_ID_DMA_0_1:
8728 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8730 case GAUDI_QUEUE_ID_DMA_0_2:
8731 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8733 case GAUDI_QUEUE_ID_DMA_0_3:
8734 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8736 case GAUDI_QUEUE_ID_DMA_1_0:
8737 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8739 case GAUDI_QUEUE_ID_DMA_1_1:
8740 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8742 case GAUDI_QUEUE_ID_DMA_1_2:
8743 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8745 case GAUDI_QUEUE_ID_DMA_1_3:
8746 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8748 case GAUDI_QUEUE_ID_DMA_5_0:
8749 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8751 case GAUDI_QUEUE_ID_DMA_5_1:
8752 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8754 case GAUDI_QUEUE_ID_DMA_5_2:
8755 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8757 case GAUDI_QUEUE_ID_DMA_5_3:
8758 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8760 case GAUDI_QUEUE_ID_TPC_7_0:
8761 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8763 case GAUDI_QUEUE_ID_TPC_7_1:
8764 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8766 case GAUDI_QUEUE_ID_TPC_7_2:
8767 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8769 case GAUDI_QUEUE_ID_TPC_7_3:
8770 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8772 case GAUDI_QUEUE_ID_NIC_0_0:
8773 case GAUDI_QUEUE_ID_NIC_1_0:
8774 case GAUDI_QUEUE_ID_NIC_2_0:
8775 case GAUDI_QUEUE_ID_NIC_3_0:
8776 case GAUDI_QUEUE_ID_NIC_4_0:
8777 case GAUDI_QUEUE_ID_NIC_5_0:
8778 case GAUDI_QUEUE_ID_NIC_6_0:
8779 case GAUDI_QUEUE_ID_NIC_7_0:
8780 case GAUDI_QUEUE_ID_NIC_8_0:
8781 case GAUDI_QUEUE_ID_NIC_9_0:
8782 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8783 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8784 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8785 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8787 case GAUDI_QUEUE_ID_NIC_0_1:
8788 case GAUDI_QUEUE_ID_NIC_1_1:
8789 case GAUDI_QUEUE_ID_NIC_2_1:
8790 case GAUDI_QUEUE_ID_NIC_3_1:
8791 case GAUDI_QUEUE_ID_NIC_4_1:
8792 case GAUDI_QUEUE_ID_NIC_5_1:
8793 case GAUDI_QUEUE_ID_NIC_6_1:
8794 case GAUDI_QUEUE_ID_NIC_7_1:
8795 case GAUDI_QUEUE_ID_NIC_8_1:
8796 case GAUDI_QUEUE_ID_NIC_9_1:
8797 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8798 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8799 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8800 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8802 case GAUDI_QUEUE_ID_NIC_0_2:
8803 case GAUDI_QUEUE_ID_NIC_1_2:
8804 case GAUDI_QUEUE_ID_NIC_2_2:
8805 case GAUDI_QUEUE_ID_NIC_3_2:
8806 case GAUDI_QUEUE_ID_NIC_4_2:
8807 case GAUDI_QUEUE_ID_NIC_5_2:
8808 case GAUDI_QUEUE_ID_NIC_6_2:
8809 case GAUDI_QUEUE_ID_NIC_7_2:
8810 case GAUDI_QUEUE_ID_NIC_8_2:
8811 case GAUDI_QUEUE_ID_NIC_9_2:
8812 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8813 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8814 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8815 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8817 case GAUDI_QUEUE_ID_NIC_0_3:
8818 case GAUDI_QUEUE_ID_NIC_1_3:
8819 case GAUDI_QUEUE_ID_NIC_2_3:
8820 case GAUDI_QUEUE_ID_NIC_3_3:
8821 case GAUDI_QUEUE_ID_NIC_4_3:
8822 case GAUDI_QUEUE_ID_NIC_5_3:
8823 case GAUDI_QUEUE_ID_NIC_6_3:
8824 case GAUDI_QUEUE_ID_NIC_7_3:
8825 case GAUDI_QUEUE_ID_NIC_8_3:
8826 case GAUDI_QUEUE_ID_NIC_9_3:
8827 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8828 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8829 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8830 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8836 *addr = CFG_BASE + offset;
8841 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8845 u16 msg_addr_offset;
8848 * monitor_base should be the content of the base0 address registers,
8849 * so it will be added to the msg short offsets
8851 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8853 /* First monitor config packet: low address of the sync */
8855 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8858 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8861 /* Second monitor config packet: high address of the sync */
8863 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8866 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8870 * Third monitor config packet: the payload, i.e. what to write when the
8874 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8877 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8882 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8883 struct hl_gen_wait_properties *prop)
8885 struct hl_cb *cb = (struct hl_cb *) prop->data;
8886 void *buf = cb->kernel_address;
8888 u32 size = prop->size;
8890 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8891 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8896 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8897 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8898 prop->sob_mask, prop->sob_val, prop->mon_id);
8899 size += gaudi_add_fence_pkt(buf + size);
8904 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8906 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8909 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8912 rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8913 CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8914 hw_sob->sob_id * 4, 1, 0);
8916 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
8918 kref_init(&hw_sob->kref);
8921 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8923 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8924 HL_POWER9_HOST_MAGIC) {
8925 hdev->power9_64bit_dma_enable = 1;
8926 hdev->dma_mask = 64;
8928 hdev->power9_64bit_dma_enable = 0;
8929 hdev->dma_mask = 48;
8933 static u64 gaudi_get_device_time(struct hl_device *hdev)
8935 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8937 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8940 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8941 u32 *block_size, u32 *block_id)
8946 static int gaudi_block_mmap(struct hl_device *hdev,
8947 struct vm_area_struct *vma,
8948 u32 block_id, u32 block_size)
8953 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8955 struct cpu_dyn_regs *dyn_regs =
8956 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8957 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8958 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8959 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8961 WREG32(irq_handler_offset,
8962 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8965 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8968 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8969 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8970 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8971 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8972 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8973 case HL_GAUDI_MME_PLL: return MME_PLL;
8974 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8975 case HL_GAUDI_IF_PLL: return IF_PLL;
8976 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8977 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8978 default: return -EINVAL;
8982 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8983 struct hl_sync_to_engine_map *map)
8985 /* Not implemented */
8989 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8991 /* Not implemented */
8995 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8996 struct hl_device *hdev,
8997 struct hl_mon_state_dump *mon)
8999 /* Not implemented */
9004 static int gaudi_print_fences_single_engine(
9005 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9006 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9007 size_t *size, size_t *offset)
9009 /* Not implemented */
9014 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9015 .monitor_valid = gaudi_monitor_valid,
9016 .print_single_monitor = gaudi_print_single_monitor,
9017 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9018 .print_fences_single_engine = gaudi_print_fences_single_engine,
9021 static void gaudi_state_dump_init(struct hl_device *hdev)
9023 /* Not implemented */
9024 hdev->state_dump_specs.props = gaudi_state_dump_specs_props;
9025 hdev->state_dump_specs.funcs = gaudi_state_dump_funcs;
9028 static const struct hl_asic_funcs gaudi_funcs = {
9029 .early_init = gaudi_early_init,
9030 .early_fini = gaudi_early_fini,
9031 .late_init = gaudi_late_init,
9032 .late_fini = gaudi_late_fini,
9033 .sw_init = gaudi_sw_init,
9034 .sw_fini = gaudi_sw_fini,
9035 .hw_init = gaudi_hw_init,
9036 .hw_fini = gaudi_hw_fini,
9037 .halt_engines = gaudi_halt_engines,
9038 .suspend = gaudi_suspend,
9039 .resume = gaudi_resume,
9040 .cb_mmap = gaudi_cb_mmap,
9041 .ring_doorbell = gaudi_ring_doorbell,
9042 .pqe_write = gaudi_pqe_write,
9043 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9044 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9045 .scrub_device_mem = gaudi_scrub_device_mem,
9046 .get_int_queue_base = gaudi_get_int_queue_base,
9047 .test_queues = gaudi_test_queues,
9048 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9049 .asic_dma_pool_free = gaudi_dma_pool_free,
9050 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9051 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9052 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9053 .cs_parser = gaudi_cs_parser,
9054 .asic_dma_map_sg = gaudi_dma_map_sg,
9055 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9056 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9057 .update_eq_ci = gaudi_update_eq_ci,
9058 .context_switch = gaudi_context_switch,
9059 .restore_phase_topology = gaudi_restore_phase_topology,
9060 .debugfs_read32 = gaudi_debugfs_read32,
9061 .debugfs_write32 = gaudi_debugfs_write32,
9062 .debugfs_read64 = gaudi_debugfs_read64,
9063 .debugfs_write64 = gaudi_debugfs_write64,
9064 .debugfs_read_dma = gaudi_debugfs_read_dma,
9065 .add_device_attr = gaudi_add_device_attr,
9066 .handle_eqe = gaudi_handle_eqe,
9067 .set_pll_profile = gaudi_set_pll_profile,
9068 .get_events_stat = gaudi_get_events_stat,
9069 .read_pte = gaudi_read_pte,
9070 .write_pte = gaudi_write_pte,
9071 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9072 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9073 .send_heartbeat = gaudi_send_heartbeat,
9074 .set_clock_gating = gaudi_set_clock_gating,
9075 .disable_clock_gating = gaudi_disable_clock_gating,
9076 .debug_coresight = gaudi_debug_coresight,
9077 .is_device_idle = gaudi_is_device_idle,
9078 .soft_reset_late_init = gaudi_soft_reset_late_init,
9079 .hw_queues_lock = gaudi_hw_queues_lock,
9080 .hw_queues_unlock = gaudi_hw_queues_unlock,
9081 .get_pci_id = gaudi_get_pci_id,
9082 .get_eeprom_data = gaudi_get_eeprom_data,
9083 .send_cpu_message = gaudi_send_cpu_message,
9084 .pci_bars_map = gaudi_pci_bars_map,
9085 .init_iatu = gaudi_init_iatu,
9088 .halt_coresight = gaudi_halt_coresight,
9089 .ctx_init = gaudi_ctx_init,
9090 .ctx_fini = gaudi_ctx_fini,
9091 .get_clk_rate = gaudi_get_clk_rate,
9092 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9093 .load_firmware_to_device = gaudi_load_firmware_to_device,
9094 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9095 .get_signal_cb_size = gaudi_get_signal_cb_size,
9096 .get_wait_cb_size = gaudi_get_wait_cb_size,
9097 .gen_signal_cb = gaudi_gen_signal_cb,
9098 .gen_wait_cb = gaudi_gen_wait_cb,
9099 .reset_sob = gaudi_reset_sob,
9100 .reset_sob_group = gaudi_reset_sob_group,
9101 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9102 .get_device_time = gaudi_get_device_time,
9103 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9104 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9105 .scramble_addr = hl_mmu_scramble_addr,
9106 .descramble_addr = hl_mmu_descramble_addr,
9107 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9108 .get_hw_block_id = gaudi_get_hw_block_id,
9109 .hw_block_mmap = gaudi_block_mmap,
9110 .enable_events_from_fw = gaudi_enable_events_from_fw,
9111 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9112 .init_firmware_loader = gaudi_init_firmware_loader,
9113 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9114 .state_dump_init = gaudi_state_dump_init
9118 * gaudi_set_asic_funcs - set GAUDI function pointers
9120 * @hdev: pointer to hl_device structure
9123 void gaudi_set_asic_funcs(struct hl_device *hdev)
9125 hdev->asic_funcs = &gaudi_funcs;