Merge tag 'perf_urgent_for_v5.17_rc2_p2' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2021 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461         bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465                                                                 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467                                         struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469                                         u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471                                         u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473                                 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479                                 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481                                 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486                 return HL_COLLECTIVE_MASTER;
487
488         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490                 return HL_COLLECTIVE_SLAVE;
491
492         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494                 return HL_COLLECTIVE_SLAVE;
495
496         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498                 return HL_COLLECTIVE_SLAVE;
499
500         return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507         if (hdev->card_type == cpucp_card_type_pmc) {
508                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510                 if (prop->fw_security_enabled)
511                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512                 else
513                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514         } else {
515                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517         }
518 }
519
520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522         struct asic_fixed_properties *prop = &hdev->asic_prop;
523         u32 num_sync_stream_queues = 0;
524         int i;
525
526         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527         prop->hw_queues_props = kcalloc(prop->max_queues,
528                         sizeof(struct hw_queue_properties),
529                         GFP_KERNEL);
530
531         if (!prop->hw_queues_props)
532                 return -ENOMEM;
533
534         for (i = 0 ; i < prop->max_queues ; i++) {
535                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537                         prop->hw_queues_props[i].driver_only = 0;
538                         prop->hw_queues_props[i].supports_sync_stream = 1;
539                         prop->hw_queues_props[i].cb_alloc_flags =
540                                 CB_ALLOC_KERNEL;
541                         num_sync_stream_queues++;
542                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544                         prop->hw_queues_props[i].driver_only = 1;
545                         prop->hw_queues_props[i].supports_sync_stream = 0;
546                         prop->hw_queues_props[i].cb_alloc_flags =
547                                 CB_ALLOC_KERNEL;
548                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550                         prop->hw_queues_props[i].driver_only = 0;
551                         prop->hw_queues_props[i].supports_sync_stream = 0;
552                         prop->hw_queues_props[i].cb_alloc_flags =
553                                 CB_ALLOC_USER;
554
555                 }
556                 prop->hw_queues_props[i].collective_mode =
557                                                 get_collective_mode(hdev, i);
558         }
559
560         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562         prop->collective_first_sob = 0;
563         prop->collective_first_mon = 0;
564
565         /* 2 SOBs per internal queue stream are reserved for collective */
566         prop->sync_stream_first_sob =
567                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568                         * QMAN_STREAMS * HL_RSVD_SOBS;
569
570         /* 1 monitor per internal queue stream are reserved for collective
571          * 2 monitors per external queue stream are reserved for collective
572          */
573         prop->sync_stream_first_mon =
574                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575                         (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577         prop->dram_base_address = DRAM_PHYS_BASE;
578         prop->dram_size = GAUDI_HBM_SIZE_32GB;
579         prop->dram_end_address = prop->dram_base_address +
580                                         prop->dram_size;
581         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583         prop->sram_base_address = SRAM_BASE_ADDR;
584         prop->sram_size = SRAM_SIZE;
585         prop->sram_end_address = prop->sram_base_address +
586                                         prop->sram_size;
587         prop->sram_user_base_address = prop->sram_base_address +
588                                         SRAM_USER_BASE_OFFSET;
589
590         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591         if (hdev->pldm)
592                 prop->mmu_pgt_size = 0x800000; /* 8MB */
593         else
594                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595         prop->mmu_pte_size = HL_PTE_SIZE;
596         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
597         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
598         prop->dram_page_size = PAGE_SIZE_2MB;
599         prop->dram_supports_virtual_memory = false;
600
601         prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
602         prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
603         prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
604         prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
605         prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
606         prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
607         prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
608         prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
609         prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
610         prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
611         prop->pmmu.start_addr = VA_HOST_SPACE_START;
612         prop->pmmu.end_addr =
613                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614         prop->pmmu.page_size = PAGE_SIZE_4KB;
615         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616         prop->pmmu.last_mask = LAST_MASK;
617
618         /* PMMU and HPMMU are the same except of page size */
619         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
620         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
621
622         /* shifts and masks are the same in PMMU and DMMU */
623         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
624         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
625         prop->dmmu.end_addr = VA_HOST_SPACE_END;
626         prop->dmmu.page_size = PAGE_SIZE_2MB;
627
628         prop->cfg_size = CFG_SIZE;
629         prop->max_asid = MAX_ASID;
630         prop->num_of_events = GAUDI_EVENT_SIZE;
631         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
632
633         set_default_power_values(hdev);
634
635         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
636         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
637
638         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
639         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
640
641         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
642                                         CARD_NAME_MAX_LEN);
643
644         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
645
646         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
647                         prop->sync_stream_first_sob +
648                         (num_sync_stream_queues * HL_RSVD_SOBS);
649         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
650                         prop->sync_stream_first_mon +
651                         (num_sync_stream_queues * HL_RSVD_MONS);
652
653         prop->first_available_user_msix_interrupt = USHRT_MAX;
654
655         for (i = 0 ; i < HL_MAX_DCORES ; i++)
656                 prop->first_available_cq[i] = USHRT_MAX;
657
658         prop->fw_cpu_boot_dev_sts0_valid = false;
659         prop->fw_cpu_boot_dev_sts1_valid = false;
660         prop->hard_reset_done_by_fw = false;
661         prop->gic_interrupts_enable = true;
662
663         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
664
665         prop->clk_pll_index = HL_GAUDI_MME_PLL;
666         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
667
668         prop->use_get_power_for_reset_history = true;
669
670         return 0;
671 }
672
673 static int gaudi_pci_bars_map(struct hl_device *hdev)
674 {
675         static const char * const name[] = {"SRAM", "CFG", "HBM"};
676         bool is_wc[3] = {false, false, true};
677         int rc;
678
679         rc = hl_pci_bars_map(hdev, name, is_wc);
680         if (rc)
681                 return rc;
682
683         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
684                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
685
686         return 0;
687 }
688
689 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
690 {
691         struct gaudi_device *gaudi = hdev->asic_specific;
692         struct hl_inbound_pci_region pci_region;
693         u64 old_addr = addr;
694         int rc;
695
696         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
697                 return old_addr;
698
699         if (hdev->asic_prop.iatu_done_by_fw)
700                 return U64_MAX;
701
702         /* Inbound Region 2 - Bar 4 - Point to HBM */
703         pci_region.mode = PCI_BAR_MATCH_MODE;
704         pci_region.bar = HBM_BAR_ID;
705         pci_region.addr = addr;
706         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
707         if (rc)
708                 return U64_MAX;
709
710         if (gaudi) {
711                 old_addr = gaudi->hbm_bar_cur_addr;
712                 gaudi->hbm_bar_cur_addr = addr;
713         }
714
715         return old_addr;
716 }
717
718 static int gaudi_init_iatu(struct hl_device *hdev)
719 {
720         struct hl_inbound_pci_region inbound_region;
721         struct hl_outbound_pci_region outbound_region;
722         int rc;
723
724         if (hdev->asic_prop.iatu_done_by_fw)
725                 return 0;
726
727         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
728         inbound_region.mode = PCI_BAR_MATCH_MODE;
729         inbound_region.bar = SRAM_BAR_ID;
730         inbound_region.addr = SRAM_BASE_ADDR;
731         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
732         if (rc)
733                 goto done;
734
735         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
736         inbound_region.mode = PCI_BAR_MATCH_MODE;
737         inbound_region.bar = CFG_BAR_ID;
738         inbound_region.addr = SPI_FLASH_BASE_ADDR;
739         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
740         if (rc)
741                 goto done;
742
743         /* Inbound Region 2 - Bar 4 - Point to HBM */
744         inbound_region.mode = PCI_BAR_MATCH_MODE;
745         inbound_region.bar = HBM_BAR_ID;
746         inbound_region.addr = DRAM_PHYS_BASE;
747         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
748         if (rc)
749                 goto done;
750
751         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
752
753         /* Outbound Region 0 - Point to Host */
754         outbound_region.addr = HOST_PHYS_BASE;
755         outbound_region.size = HOST_PHYS_SIZE;
756         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
757
758 done:
759         return rc;
760 }
761
762 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
763 {
764         return RREG32(mmHW_STATE);
765 }
766
767 static int gaudi_early_init(struct hl_device *hdev)
768 {
769         struct asic_fixed_properties *prop = &hdev->asic_prop;
770         struct pci_dev *pdev = hdev->pdev;
771         u32 fw_boot_status;
772         int rc;
773
774         rc = gaudi_set_fixed_properties(hdev);
775         if (rc) {
776                 dev_err(hdev->dev, "Failed setting fixed properties\n");
777                 return rc;
778         }
779
780         /* Check BAR sizes */
781         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
782                 dev_err(hdev->dev,
783                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
784                         SRAM_BAR_ID,
785                         (unsigned long long) pci_resource_len(pdev,
786                                                         SRAM_BAR_ID),
787                         SRAM_BAR_SIZE);
788                 rc = -ENODEV;
789                 goto free_queue_props;
790         }
791
792         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
793                 dev_err(hdev->dev,
794                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
795                         CFG_BAR_ID,
796                         (unsigned long long) pci_resource_len(pdev,
797                                                                 CFG_BAR_ID),
798                         CFG_BAR_SIZE);
799                 rc = -ENODEV;
800                 goto free_queue_props;
801         }
802
803         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
804         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
805
806         /* If FW security is enabled at this point it means no access to ELBI */
807         if (hdev->asic_prop.fw_security_enabled) {
808                 hdev->asic_prop.iatu_done_by_fw = true;
809
810                 /*
811                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
812                  * decision can only be taken based on PCI ID security.
813                  */
814                 hdev->asic_prop.gic_interrupts_enable = false;
815                 goto pci_init;
816         }
817
818         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
819                                 &fw_boot_status);
820         if (rc)
821                 goto free_queue_props;
822
823         /* Check whether FW is configuring iATU */
824         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
825                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
826                 hdev->asic_prop.iatu_done_by_fw = true;
827
828 pci_init:
829         rc = hl_pci_init(hdev);
830         if (rc)
831                 goto free_queue_props;
832
833         /* Before continuing in the initialization, we need to read the preboot
834          * version to determine whether we run with a security-enabled firmware
835          */
836         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
837                                         mmCPU_BOOT_DEV_STS0,
838                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
839                                         mmCPU_BOOT_ERR1,
840                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
841         if (rc) {
842                 if (hdev->reset_on_preboot_fail)
843                         hdev->asic_funcs->hw_fini(hdev, true, false);
844                 goto pci_fini;
845         }
846
847         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
848                 dev_info(hdev->dev,
849                         "H/W state is dirty, must reset before initializing\n");
850                 hdev->asic_funcs->hw_fini(hdev, true, false);
851         }
852
853         return 0;
854
855 pci_fini:
856         hl_pci_fini(hdev);
857 free_queue_props:
858         kfree(hdev->asic_prop.hw_queues_props);
859         return rc;
860 }
861
862 static int gaudi_early_fini(struct hl_device *hdev)
863 {
864         kfree(hdev->asic_prop.hw_queues_props);
865         hl_pci_fini(hdev);
866
867         return 0;
868 }
869
870 /**
871  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
872  *
873  * @hdev: pointer to hl_device structure
874  *
875  */
876 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
877 {
878         struct asic_fixed_properties *prop = &hdev->asic_prop;
879         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
880         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
881         int rc;
882
883         if (hdev->asic_prop.fw_security_enabled) {
884                 struct gaudi_device *gaudi = hdev->asic_specific;
885
886                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
887                         return 0;
888
889                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
890
891                 if (rc)
892                         return rc;
893
894                 freq = pll_freq_arr[2];
895         } else {
896                 /* Backward compatibility */
897                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
898                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
899                 nr = RREG32(mmPSOC_CPU_PLL_NR);
900                 nf = RREG32(mmPSOC_CPU_PLL_NF);
901                 od = RREG32(mmPSOC_CPU_PLL_OD);
902
903                 if (div_sel == DIV_SEL_REF_CLK ||
904                                 div_sel == DIV_SEL_DIVIDED_REF) {
905                         if (div_sel == DIV_SEL_REF_CLK)
906                                 freq = PLL_REF_CLK;
907                         else
908                                 freq = PLL_REF_CLK / (div_fctr + 1);
909                 } else if (div_sel == DIV_SEL_PLL_CLK ||
910                         div_sel == DIV_SEL_DIVIDED_PLL) {
911                         pll_clk = PLL_REF_CLK * (nf + 1) /
912                                         ((nr + 1) * (od + 1));
913                         if (div_sel == DIV_SEL_PLL_CLK)
914                                 freq = pll_clk;
915                         else
916                                 freq = pll_clk / (div_fctr + 1);
917                 } else {
918                         dev_warn(hdev->dev,
919                                 "Received invalid div select value: %d",
920                                 div_sel);
921                         freq = 0;
922                 }
923         }
924
925         prop->psoc_timestamp_frequency = freq;
926         prop->psoc_pci_pll_nr = nr;
927         prop->psoc_pci_pll_nf = nf;
928         prop->psoc_pci_pll_od = od;
929         prop->psoc_pci_pll_div_factor = div_fctr;
930
931         return 0;
932 }
933
934 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
935                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
936 {
937         struct asic_fixed_properties *prop = &hdev->asic_prop;
938         struct packet_lin_dma *init_tpc_mem_pkt;
939         struct hl_cs_job *job;
940         struct hl_cb *cb;
941         u64 dst_addr;
942         u32 cb_size, ctl;
943         u8 tpc_id;
944         int rc;
945
946         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
947         if (!cb)
948                 return -EFAULT;
949
950         init_tpc_mem_pkt = cb->kernel_address;
951         cb_size = sizeof(*init_tpc_mem_pkt);
952         memset(init_tpc_mem_pkt, 0, cb_size);
953
954         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
955
956         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
957         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
958         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
959         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
960
961         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
962
963         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
964         dst_addr = (prop->sram_user_base_address &
965                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
966                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
967         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
968
969         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
970         if (!job) {
971                 dev_err(hdev->dev, "Failed to allocate a new job\n");
972                 rc = -ENOMEM;
973                 goto release_cb;
974         }
975
976         job->id = 0;
977         job->user_cb = cb;
978         atomic_inc(&job->user_cb->cs_cnt);
979         job->user_cb_size = cb_size;
980         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
981         job->patched_cb = job->user_cb;
982         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
983
984         hl_debugfs_add_job(hdev, job);
985
986         rc = gaudi_send_job_on_qman0(hdev, job);
987
988         if (rc)
989                 goto free_job;
990
991         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
992                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
993                 if (rc)
994                         break;
995         }
996
997 free_job:
998         hl_userptr_delete_list(hdev, &job->userptr_list);
999         hl_debugfs_remove_job(hdev, job);
1000         kfree(job);
1001         atomic_dec(&cb->cs_cnt);
1002
1003 release_cb:
1004         hl_cb_put(cb);
1005         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1006
1007         return rc;
1008 }
1009
1010 /*
1011  * gaudi_init_tpc_mem() - Initialize TPC memories.
1012  * @hdev: Pointer to hl_device structure.
1013  *
1014  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1015  *
1016  * Return: 0 for success, negative value for error.
1017  */
1018 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1019 {
1020         const struct firmware *fw;
1021         size_t fw_size;
1022         void *cpu_addr;
1023         dma_addr_t dma_handle;
1024         int rc, count = 5;
1025
1026 again:
1027         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1028         if (rc == -EINTR && count-- > 0) {
1029                 msleep(50);
1030                 goto again;
1031         }
1032
1033         if (rc) {
1034                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1035                                 GAUDI_TPC_FW_FILE);
1036                 goto out;
1037         }
1038
1039         fw_size = fw->size;
1040         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1041                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1042         if (!cpu_addr) {
1043                 dev_err(hdev->dev,
1044                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1045                         fw_size);
1046                 rc = -ENOMEM;
1047                 goto out;
1048         }
1049
1050         memcpy(cpu_addr, fw->data, fw_size);
1051
1052         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1053
1054         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1055                         dma_handle);
1056
1057 out:
1058         release_firmware(fw);
1059         return rc;
1060 }
1061
1062 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1063 {
1064         struct gaudi_device *gaudi = hdev->asic_specific;
1065         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1066         struct hl_hw_queue *q;
1067         u32 i, sob_id, sob_group_id, queue_id;
1068
1069         /* Iterate through SOB groups and assign a SOB for each slave queue */
1070         sob_group_id =
1071                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1072         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1073
1074         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1075         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1076                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1077                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1078         }
1079
1080         /* Both DMA5 and TPC7 use the same resources since only a single
1081          * engine need to participate in the reduction process
1082          */
1083         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1084         q = &hdev->kernel_queues[queue_id];
1085         q->sync_stream_prop.collective_sob_id =
1086                         sob_id + NIC_NUMBER_OF_ENGINES;
1087
1088         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1089         q = &hdev->kernel_queues[queue_id];
1090         q->sync_stream_prop.collective_sob_id =
1091                         sob_id + NIC_NUMBER_OF_ENGINES;
1092 }
1093
1094 static void gaudi_sob_group_hw_reset(struct kref *ref)
1095 {
1096         struct gaudi_hw_sob_group *hw_sob_group =
1097                 container_of(ref, struct gaudi_hw_sob_group, kref);
1098         struct hl_device *hdev = hw_sob_group->hdev;
1099         int i;
1100
1101         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1102                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1103                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1104
1105         kref_init(&hw_sob_group->kref);
1106 }
1107
1108 static void gaudi_sob_group_reset_error(struct kref *ref)
1109 {
1110         struct gaudi_hw_sob_group *hw_sob_group =
1111                 container_of(ref, struct gaudi_hw_sob_group, kref);
1112         struct hl_device *hdev = hw_sob_group->hdev;
1113
1114         dev_crit(hdev->dev,
1115                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1116                 hw_sob_group->base_sob_id);
1117 }
1118
1119 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1120 {
1121         struct gaudi_collective_properties *prop;
1122         int i;
1123
1124         prop = &gaudi->collective_props;
1125
1126         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1127
1128         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1129                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1130                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1131                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1132         /* Set collective engine bit */
1133         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1134                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1135 }
1136
1137 static int gaudi_collective_init(struct hl_device *hdev)
1138 {
1139         u32 i, sob_id, reserved_sobs_per_group;
1140         struct gaudi_collective_properties *prop;
1141         struct gaudi_device *gaudi;
1142
1143         gaudi = hdev->asic_specific;
1144         prop = &gaudi->collective_props;
1145         sob_id = hdev->asic_prop.collective_first_sob;
1146
1147         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1148         reserved_sobs_per_group =
1149                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1150
1151         /* Init SOB groups */
1152         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1153                 prop->hw_sob_group[i].hdev = hdev;
1154                 prop->hw_sob_group[i].base_sob_id = sob_id;
1155                 sob_id += reserved_sobs_per_group;
1156                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1157         }
1158
1159         for (i = 0 ; i < QMAN_STREAMS; i++) {
1160                 prop->next_sob_group_val[i] = 1;
1161                 prop->curr_sob_group_idx[i] = 0;
1162                 gaudi_collective_map_sobs(hdev, i);
1163         }
1164
1165         gaudi_collective_mstr_sob_mask_set(gaudi);
1166
1167         return 0;
1168 }
1169
1170 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1171 {
1172         struct gaudi_device *gaudi = hdev->asic_specific;
1173         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1174
1175         kref_put(&cprop->hw_sob_group[sob_group].kref,
1176                                         gaudi_sob_group_hw_reset);
1177 }
1178
1179 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1180                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1181 {
1182         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1183         struct gaudi_collective_properties *cprop;
1184         struct hl_gen_wait_properties wait_prop;
1185         struct hl_sync_stream_properties *prop;
1186         struct gaudi_device *gaudi;
1187
1188         gaudi = hdev->asic_specific;
1189         cprop = &gaudi->collective_props;
1190         queue_id = job->hw_queue_id;
1191         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1192
1193         master_sob_base =
1194                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1195         master_monitor = prop->collective_mstr_mon_id[0];
1196
1197         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1198
1199         dev_dbg(hdev->dev,
1200                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1201                 master_sob_base, cprop->mstr_sob_mask[0],
1202                 cprop->next_sob_group_val[stream],
1203                 master_monitor, queue_id);
1204
1205         wait_prop.data = (void *) job->patched_cb;
1206         wait_prop.sob_base = master_sob_base;
1207         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1208         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1209         wait_prop.mon_id = master_monitor;
1210         wait_prop.q_idx = queue_id;
1211         wait_prop.size = cb_size;
1212         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1213
1214         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1215         master_monitor = prop->collective_mstr_mon_id[1];
1216
1217         dev_dbg(hdev->dev,
1218                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1219                 master_sob_base, cprop->mstr_sob_mask[1],
1220                 cprop->next_sob_group_val[stream],
1221                 master_monitor, queue_id);
1222
1223         wait_prop.sob_base = master_sob_base;
1224         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1225         wait_prop.mon_id = master_monitor;
1226         wait_prop.size = cb_size;
1227         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1228 }
1229
1230 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1231                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1232 {
1233         struct hl_gen_wait_properties wait_prop;
1234         struct hl_sync_stream_properties *prop;
1235         u32 queue_id, cb_size = 0;
1236
1237         queue_id = job->hw_queue_id;
1238         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1239
1240         if (job->cs->encaps_signals) {
1241                 /* use the encaps signal handle store earlier in the flow
1242                  * and set the SOB information from the encaps
1243                  * signals handle
1244                  */
1245                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1246                                                 cs_cmpl);
1247
1248                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1249                                 job->cs->sequence,
1250                                 cs_cmpl->hw_sob->sob_id,
1251                                 cs_cmpl->sob_val);
1252         }
1253
1254         /* Add to wait CBs using slave monitor */
1255         wait_prop.data = (void *) job->user_cb;
1256         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1257         wait_prop.sob_mask = 0x1;
1258         wait_prop.sob_val = cs_cmpl->sob_val;
1259         wait_prop.mon_id = prop->collective_slave_mon_id;
1260         wait_prop.q_idx = queue_id;
1261         wait_prop.size = cb_size;
1262
1263         dev_dbg(hdev->dev,
1264                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1265                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1266                 prop->collective_slave_mon_id, queue_id);
1267
1268         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1269
1270         dev_dbg(hdev->dev,
1271                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1272                 prop->collective_sob_id, queue_id);
1273
1274         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1275                         prop->collective_sob_id, cb_size, false);
1276 }
1277
1278 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1279 {
1280         struct hl_cs_compl *signal_cs_cmpl =
1281                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1282         struct hl_cs_compl *cs_cmpl =
1283                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1284         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1285         struct gaudi_collective_properties *cprop;
1286         u32 stream, queue_id, sob_group_offset;
1287         struct gaudi_device *gaudi;
1288         struct hl_device *hdev;
1289         struct hl_cs_job *job;
1290         struct hl_ctx *ctx;
1291
1292         ctx = cs->ctx;
1293         hdev = ctx->hdev;
1294         gaudi = hdev->asic_specific;
1295         cprop = &gaudi->collective_props;
1296
1297         if (cs->encaps_signals) {
1298                 cs_cmpl->hw_sob = handle->hw_sob;
1299                 /* at this checkpoint we only need the hw_sob pointer
1300                  * for the completion check before start going over the jobs
1301                  * of the master/slaves, the sob_value will be taken later on
1302                  * in gaudi_collective_slave_init_job depends on each
1303                  * job wait offset value.
1304                  */
1305                 cs_cmpl->sob_val = 0;
1306         } else {
1307                 /* copy the SOB id and value of the signal CS */
1308                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1309                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1310         }
1311
1312         /* check again if the signal cs already completed.
1313          * if yes then don't send any wait cs since the hw_sob
1314          * could be in reset already. if signal is not completed
1315          * then get refcount to hw_sob to prevent resetting the sob
1316          * while wait cs is not submitted.
1317          * note that this check is protected by two locks,
1318          * hw queue lock and completion object lock,
1319          * and the same completion object lock also protects
1320          * the hw_sob reset handler function.
1321          * The hw_queue lock prevent out of sync of hw_sob
1322          * refcount value, changed by signal/wait flows.
1323          */
1324         spin_lock(&signal_cs_cmpl->lock);
1325
1326         if (completion_done(&cs->signal_fence->completion)) {
1327                 spin_unlock(&signal_cs_cmpl->lock);
1328                 return -EINVAL;
1329         }
1330         /* Increment kref since all slave queues are now waiting on it */
1331         kref_get(&cs_cmpl->hw_sob->kref);
1332
1333         spin_unlock(&signal_cs_cmpl->lock);
1334
1335         /* Calculate the stream from collective master queue (1st job) */
1336         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1337         stream = job->hw_queue_id % 4;
1338         sob_group_offset =
1339                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1340
1341         list_for_each_entry(job, &cs->job_list, cs_node) {
1342                 queue_id = job->hw_queue_id;
1343
1344                 if (hdev->kernel_queues[queue_id].collective_mode ==
1345                                 HL_COLLECTIVE_MASTER)
1346                         gaudi_collective_master_init_job(hdev, job, stream,
1347                                                 sob_group_offset);
1348                 else
1349                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1350         }
1351
1352         cs_cmpl->sob_group = sob_group_offset;
1353
1354         /* Handle sob group kref and wraparound */
1355         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1356         cprop->next_sob_group_val[stream]++;
1357
1358         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1359                 /*
1360                  * Decrement as we reached the max value.
1361                  * The release function won't be called here as we've
1362                  * just incremented the refcount.
1363                  */
1364                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1365                                 gaudi_sob_group_reset_error);
1366                 cprop->next_sob_group_val[stream] = 1;
1367                 /* only two SOBs are currently in use */
1368                 cprop->curr_sob_group_idx[stream] =
1369                         (cprop->curr_sob_group_idx[stream] + 1) &
1370                                                         (HL_RSVD_SOBS - 1);
1371
1372                 gaudi_collective_map_sobs(hdev, stream);
1373
1374                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1375                                 cprop->curr_sob_group_idx[stream], stream);
1376         }
1377
1378         mb();
1379         hl_fence_put(cs->signal_fence);
1380         cs->signal_fence = NULL;
1381
1382         return 0;
1383 }
1384
1385 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1386                 struct hl_ctx *ctx, struct hl_cs *cs,
1387                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1388                 u32 encaps_signal_offset)
1389 {
1390         struct hw_queue_properties *hw_queue_prop;
1391         struct hl_cs_counters_atomic *cntr;
1392         struct hl_cs_job *job;
1393         struct hl_cb *cb;
1394         u32 cb_size;
1395         bool patched_cb;
1396
1397         cntr = &hdev->aggregated_cs_counters;
1398
1399         if (mode == HL_COLLECTIVE_MASTER) {
1400                 /* CB size of collective master queue contains
1401                  * 4 msg short packets for monitor 1 configuration
1402                  * 1 fence packet
1403                  * 4 msg short packets for monitor 2 configuration
1404                  * 1 fence packet
1405                  * 2 msg prot packets for completion and MSI-X
1406                  */
1407                 cb_size = sizeof(struct packet_msg_short) * 8 +
1408                                 sizeof(struct packet_fence) * 2 +
1409                                 sizeof(struct packet_msg_prot) * 2;
1410                 patched_cb = true;
1411         } else {
1412                 /* CB size of collective slave queues contains
1413                  * 4 msg short packets for monitor configuration
1414                  * 1 fence packet
1415                  * 1 additional msg short packet for sob signal
1416                  */
1417                 cb_size = sizeof(struct packet_msg_short) * 5 +
1418                                 sizeof(struct packet_fence);
1419                 patched_cb = false;
1420         }
1421
1422         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1423         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1424         if (!job) {
1425                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1426                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1427                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1428                 return -ENOMEM;
1429         }
1430
1431         /* Allocate internal mapped CB for non patched CBs */
1432         cb = hl_cb_kernel_create(hdev, cb_size,
1433                         hdev->mmu_enable && !patched_cb);
1434         if (!cb) {
1435                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1436                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1437                 kfree(job);
1438                 return -EFAULT;
1439         }
1440
1441         job->id = 0;
1442         job->cs = cs;
1443         job->user_cb = cb;
1444         atomic_inc(&job->user_cb->cs_cnt);
1445         job->user_cb_size = cb_size;
1446         job->hw_queue_id = queue_id;
1447
1448         /* since its guaranteed to have only one chunk in the collective wait
1449          * cs, we can use this chunk to set the encapsulated signal offset
1450          * in the jobs.
1451          */
1452         if (cs->encaps_signals)
1453                 job->encaps_sig_wait_offset = encaps_signal_offset;
1454
1455         /*
1456          * No need in parsing, user CB is the patched CB.
1457          * We call hl_cb_destroy() out of two reasons - we don't need
1458          * the CB in the CB idr anymore and to decrement its refcount as
1459          * it was incremented inside hl_cb_kernel_create().
1460          */
1461         if (patched_cb)
1462                 job->patched_cb = job->user_cb;
1463         else
1464                 job->patched_cb = NULL;
1465
1466         job->job_cb_size = job->user_cb_size;
1467         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1468
1469         /* increment refcount as for external queues we get completion */
1470         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1471                 cs_get(cs);
1472
1473         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1474
1475         list_add_tail(&job->cs_node, &cs->job_list);
1476
1477         hl_debugfs_add_job(hdev, job);
1478
1479         return 0;
1480 }
1481
1482 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1483                 struct hl_ctx *ctx, struct hl_cs *cs,
1484                 u32 wait_queue_id, u32 collective_engine_id,
1485                 u32 encaps_signal_offset)
1486 {
1487         struct gaudi_device *gaudi = hdev->asic_specific;
1488         struct hw_queue_properties *hw_queue_prop;
1489         u32 queue_id, collective_queue, num_jobs;
1490         u32 stream, nic_queue, nic_idx = 0;
1491         bool skip;
1492         int i, rc = 0;
1493
1494         /* Verify wait queue id is configured as master */
1495         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1496         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1497                 dev_err(hdev->dev,
1498                         "Queue %d is not configured as collective master\n",
1499                         wait_queue_id);
1500                 return -EINVAL;
1501         }
1502
1503         /* Verify engine id is supported */
1504         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1505                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1506                 dev_err(hdev->dev,
1507                         "Collective wait does not support engine %u\n",
1508                         collective_engine_id);
1509                 return -EINVAL;
1510         }
1511
1512         stream = wait_queue_id % 4;
1513
1514         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1515                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1516         else
1517                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1518
1519         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1520         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1521
1522         /* First job goes to the collective master queue, it will wait for
1523          * the collective slave queues to finish execution.
1524          * The synchronization is done using two monitors:
1525          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1526          * reduction engine (DMA5/TPC7).
1527          *
1528          * Rest of the jobs goes to the collective slave queues which will
1529          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1530          */
1531         for (i = 0 ; i < num_jobs ; i++) {
1532                 if (i == 0) {
1533                         queue_id = wait_queue_id;
1534                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1535                                 HL_COLLECTIVE_MASTER, queue_id,
1536                                 wait_queue_id, encaps_signal_offset);
1537                 } else {
1538                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1539                                 if (gaudi->hw_cap_initialized &
1540                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1541                                         skip = false;
1542                                 else
1543                                         skip = true;
1544
1545                                 queue_id = nic_queue;
1546                                 nic_queue += 4;
1547                                 nic_idx++;
1548
1549                                 if (skip)
1550                                         continue;
1551                         } else {
1552                                 queue_id = collective_queue;
1553                         }
1554
1555                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1556                                 HL_COLLECTIVE_SLAVE, queue_id,
1557                                 wait_queue_id, encaps_signal_offset);
1558                 }
1559
1560                 if (rc)
1561                         return rc;
1562         }
1563
1564         return rc;
1565 }
1566
1567 static int gaudi_late_init(struct hl_device *hdev)
1568 {
1569         struct gaudi_device *gaudi = hdev->asic_specific;
1570         int rc;
1571
1572         rc = gaudi->cpucp_info_get(hdev);
1573         if (rc) {
1574                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1575                 return rc;
1576         }
1577
1578         if ((hdev->card_type == cpucp_card_type_pci) &&
1579                         (hdev->nic_ports_mask & 0x3)) {
1580                 dev_info(hdev->dev,
1581                         "PCI card detected, only 8 ports are enabled\n");
1582                 hdev->nic_ports_mask &= ~0x3;
1583
1584                 /* Stop and disable unused NIC QMANs */
1585                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1586                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1587                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1588
1589                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1590                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1591                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1592
1593                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1594                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1595
1596                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1597         }
1598
1599         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1600         if (rc) {
1601                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1602                 return rc;
1603         }
1604
1605         /* Scrub both SRAM and DRAM */
1606         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1607         if (rc)
1608                 goto disable_pci_access;
1609
1610         rc = gaudi_fetch_psoc_frequency(hdev);
1611         if (rc) {
1612                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1613                 goto disable_pci_access;
1614         }
1615
1616         rc = gaudi_mmu_clear_pgt_range(hdev);
1617         if (rc) {
1618                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1619                 goto disable_pci_access;
1620         }
1621
1622         rc = gaudi_init_tpc_mem(hdev);
1623         if (rc) {
1624                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1625                 goto disable_pci_access;
1626         }
1627
1628         rc = gaudi_collective_init(hdev);
1629         if (rc) {
1630                 dev_err(hdev->dev, "Failed to init collective\n");
1631                 goto disable_pci_access;
1632         }
1633
1634         /* We only support a single ASID for the user, so for the sake of optimization, just
1635          * initialize the ASID one time during device initialization with the fixed value of 1
1636          */
1637         gaudi_mmu_prepare(hdev, 1);
1638
1639         hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
1640
1641         return 0;
1642
1643 disable_pci_access:
1644         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1645
1646         return rc;
1647 }
1648
1649 static void gaudi_late_fini(struct hl_device *hdev)
1650 {
1651         const struct hwmon_channel_info **channel_info_arr;
1652         int i = 0;
1653
1654         if (!hdev->hl_chip_info->info)
1655                 return;
1656
1657         channel_info_arr = hdev->hl_chip_info->info;
1658
1659         while (channel_info_arr[i]) {
1660                 kfree(channel_info_arr[i]->config);
1661                 kfree(channel_info_arr[i]);
1662                 i++;
1663         }
1664
1665         kfree(channel_info_arr);
1666
1667         hdev->hl_chip_info->info = NULL;
1668 }
1669
1670 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1671 {
1672         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1673         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1674         int i, j, rc = 0;
1675
1676         /*
1677          * The device CPU works with 40-bits addresses, while bit 39 must be set
1678          * to '1' when accessing the host.
1679          * Bits 49:39 of the full host address are saved for a later
1680          * configuration of the HW to perform extension to 50 bits.
1681          * Because there is a single HW register that holds the extension bits,
1682          * these bits must be identical in all allocated range.
1683          */
1684
1685         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1686                 virt_addr_arr[i] =
1687                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1688                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1689                                                 &dma_addr_arr[i],
1690                                                 GFP_KERNEL | __GFP_ZERO);
1691                 if (!virt_addr_arr[i]) {
1692                         rc = -ENOMEM;
1693                         goto free_dma_mem_arr;
1694                 }
1695
1696                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1697                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1698                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1699                         break;
1700         }
1701
1702         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1703                 dev_err(hdev->dev,
1704                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1705                 rc = -EFAULT;
1706                 goto free_dma_mem_arr;
1707         }
1708
1709         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1710         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1711         hdev->cpu_pci_msb_addr =
1712                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1713
1714         if (!hdev->asic_prop.fw_security_enabled)
1715                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1716
1717 free_dma_mem_arr:
1718         for (j = 0 ; j < i ; j++)
1719                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1720                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1721                                                 virt_addr_arr[j],
1722                                                 dma_addr_arr[j]);
1723
1724         return rc;
1725 }
1726
1727 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1728 {
1729         struct gaudi_device *gaudi = hdev->asic_specific;
1730         struct gaudi_internal_qman_info *q;
1731         u32 i;
1732
1733         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1734                 q = &gaudi->internal_qmans[i];
1735                 if (!q->pq_kernel_addr)
1736                         continue;
1737                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1738                                                         q->pq_kernel_addr,
1739                                                         q->pq_dma_addr);
1740         }
1741 }
1742
1743 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1744 {
1745         struct gaudi_device *gaudi = hdev->asic_specific;
1746         struct gaudi_internal_qman_info *q;
1747         int rc, i;
1748
1749         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1750                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1751                         continue;
1752
1753                 q = &gaudi->internal_qmans[i];
1754
1755                 switch (i) {
1756                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1757                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1758                         break;
1759                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1760                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1761                         break;
1762                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1763                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1764                         break;
1765                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1766                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1767                         break;
1768                 default:
1769                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1770                         rc = -EINVAL;
1771                         goto free_internal_qmans_pq_mem;
1772                 }
1773
1774                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1775                                                 hdev, q->pq_size,
1776                                                 &q->pq_dma_addr,
1777                                                 GFP_KERNEL | __GFP_ZERO);
1778                 if (!q->pq_kernel_addr) {
1779                         rc = -ENOMEM;
1780                         goto free_internal_qmans_pq_mem;
1781                 }
1782         }
1783
1784         return 0;
1785
1786 free_internal_qmans_pq_mem:
1787         gaudi_free_internal_qmans_pq_mem(hdev);
1788         return rc;
1789 }
1790
1791 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1792 {
1793         struct asic_fixed_properties *prop = &hdev->asic_prop;
1794         struct pci_mem_region *region;
1795
1796         /* CFG */
1797         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1798         region->region_base = CFG_BASE;
1799         region->region_size = CFG_SIZE;
1800         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1801         region->bar_size = CFG_BAR_SIZE;
1802         region->bar_id = CFG_BAR_ID;
1803         region->used = 1;
1804
1805         /* SRAM */
1806         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1807         region->region_base = SRAM_BASE_ADDR;
1808         region->region_size = SRAM_SIZE;
1809         region->offset_in_bar = 0;
1810         region->bar_size = SRAM_BAR_SIZE;
1811         region->bar_id = SRAM_BAR_ID;
1812         region->used = 1;
1813
1814         /* DRAM */
1815         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1816         region->region_base = DRAM_PHYS_BASE;
1817         region->region_size = hdev->asic_prop.dram_size;
1818         region->offset_in_bar = 0;
1819         region->bar_size = prop->dram_pci_bar_size;
1820         region->bar_id = HBM_BAR_ID;
1821         region->used = 1;
1822
1823         /* SP SRAM */
1824         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1825         region->region_base = PSOC_SCRATCHPAD_ADDR;
1826         region->region_size = PSOC_SCRATCHPAD_SIZE;
1827         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1828         region->bar_size = CFG_BAR_SIZE;
1829         region->bar_id = CFG_BAR_ID;
1830         region->used = 1;
1831 }
1832
1833 static int gaudi_sw_init(struct hl_device *hdev)
1834 {
1835         struct gaudi_device *gaudi;
1836         u32 i, event_id = 0;
1837         int rc;
1838
1839         /* Allocate device structure */
1840         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1841         if (!gaudi)
1842                 return -ENOMEM;
1843
1844         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1845                 if (gaudi_irq_map_table[i].valid) {
1846                         if (event_id == GAUDI_EVENT_SIZE) {
1847                                 dev_err(hdev->dev,
1848                                         "Event array exceeds the limit of %u events\n",
1849                                         GAUDI_EVENT_SIZE);
1850                                 rc = -EINVAL;
1851                                 goto free_gaudi_device;
1852                         }
1853
1854                         gaudi->events[event_id++] =
1855                                         gaudi_irq_map_table[i].fc_id;
1856                 }
1857         }
1858
1859         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1860
1861         hdev->asic_specific = gaudi;
1862
1863         /* Create DMA pool for small allocations */
1864         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1865                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1866         if (!hdev->dma_pool) {
1867                 dev_err(hdev->dev, "failed to create DMA pool\n");
1868                 rc = -ENOMEM;
1869                 goto free_gaudi_device;
1870         }
1871
1872         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1873         if (rc)
1874                 goto free_dma_pool;
1875
1876         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1877         if (!hdev->cpu_accessible_dma_pool) {
1878                 dev_err(hdev->dev,
1879                         "Failed to create CPU accessible DMA pool\n");
1880                 rc = -ENOMEM;
1881                 goto free_cpu_dma_mem;
1882         }
1883
1884         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1885                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1886                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1887         if (rc) {
1888                 dev_err(hdev->dev,
1889                         "Failed to add memory to CPU accessible DMA pool\n");
1890                 rc = -EFAULT;
1891                 goto free_cpu_accessible_dma_pool;
1892         }
1893
1894         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1895         if (rc)
1896                 goto free_cpu_accessible_dma_pool;
1897
1898         spin_lock_init(&gaudi->hw_queues_lock);
1899         mutex_init(&gaudi->clk_gate_mutex);
1900
1901         hdev->supports_sync_stream = true;
1902         hdev->supports_coresight = true;
1903         hdev->supports_staged_submission = true;
1904         hdev->supports_wait_for_multi_cs = true;
1905
1906         hdev->asic_funcs->set_pci_memory_regions(hdev);
1907         hdev->stream_master_qid_arr =
1908                                 hdev->asic_funcs->get_stream_master_qid_arr();
1909         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1910
1911         return 0;
1912
1913 free_cpu_accessible_dma_pool:
1914         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1915 free_cpu_dma_mem:
1916         if (!hdev->asic_prop.fw_security_enabled)
1917                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1918                                         hdev->cpu_pci_msb_addr);
1919         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1920                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1921                         hdev->cpu_accessible_dma_mem,
1922                         hdev->cpu_accessible_dma_address);
1923 free_dma_pool:
1924         dma_pool_destroy(hdev->dma_pool);
1925 free_gaudi_device:
1926         kfree(gaudi);
1927         return rc;
1928 }
1929
1930 static int gaudi_sw_fini(struct hl_device *hdev)
1931 {
1932         struct gaudi_device *gaudi = hdev->asic_specific;
1933
1934         gaudi_free_internal_qmans_pq_mem(hdev);
1935
1936         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1937
1938         if (!hdev->asic_prop.fw_security_enabled)
1939                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1940                                         hdev->cpu_pci_msb_addr);
1941
1942         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1943                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1944                         hdev->cpu_accessible_dma_mem,
1945                         hdev->cpu_accessible_dma_address);
1946
1947         dma_pool_destroy(hdev->dma_pool);
1948
1949         mutex_destroy(&gaudi->clk_gate_mutex);
1950
1951         kfree(gaudi);
1952
1953         return 0;
1954 }
1955
1956 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1957 {
1958         struct hl_device *hdev = arg;
1959         int i;
1960
1961         if (hdev->disabled)
1962                 return IRQ_HANDLED;
1963
1964         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1965                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1966
1967         hl_irq_handler_eq(irq, &hdev->event_queue);
1968
1969         return IRQ_HANDLED;
1970 }
1971
1972 /*
1973  * For backward compatibility, new MSI interrupts should be set after the
1974  * existing CPU and NIC interrupts.
1975  */
1976 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1977                                 bool cpu_eq)
1978 {
1979         int msi_vec;
1980
1981         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1982                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1983                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1984
1985         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1986                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1987
1988         return pci_irq_vector(hdev->pdev, msi_vec);
1989 }
1990
1991 static int gaudi_enable_msi_single(struct hl_device *hdev)
1992 {
1993         int rc, irq;
1994
1995         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1996
1997         irq = gaudi_pci_irq_vector(hdev, 0, false);
1998         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1999                         "gaudi single msi", hdev);
2000         if (rc)
2001                 dev_err(hdev->dev,
2002                         "Failed to request single MSI IRQ\n");
2003
2004         return rc;
2005 }
2006
2007 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2008 {
2009         int cq_cnt = hdev->asic_prop.completion_queues_count;
2010         int rc, i, irq_cnt_init, irq;
2011
2012         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2013                 irq = gaudi_pci_irq_vector(hdev, i, false);
2014                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2015                                 &hdev->completion_queue[i]);
2016                 if (rc) {
2017                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2018                         goto free_irqs;
2019                 }
2020         }
2021
2022         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2023         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2024                                 &hdev->event_queue);
2025         if (rc) {
2026                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2027                 goto free_irqs;
2028         }
2029
2030         return 0;
2031
2032 free_irqs:
2033         for (i = 0 ; i < irq_cnt_init ; i++)
2034                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2035                                 &hdev->completion_queue[i]);
2036         return rc;
2037 }
2038
2039 static int gaudi_enable_msi(struct hl_device *hdev)
2040 {
2041         struct gaudi_device *gaudi = hdev->asic_specific;
2042         int rc;
2043
2044         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2045                 return 0;
2046
2047         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2048         if (rc < 0) {
2049                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2050                 return rc;
2051         }
2052
2053         if (rc < NUMBER_OF_INTERRUPTS) {
2054                 gaudi->multi_msi_mode = false;
2055                 rc = gaudi_enable_msi_single(hdev);
2056         } else {
2057                 gaudi->multi_msi_mode = true;
2058                 rc = gaudi_enable_msi_multi(hdev);
2059         }
2060
2061         if (rc)
2062                 goto free_pci_irq_vectors;
2063
2064         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2065
2066         return 0;
2067
2068 free_pci_irq_vectors:
2069         pci_free_irq_vectors(hdev->pdev);
2070         return rc;
2071 }
2072
2073 static void gaudi_sync_irqs(struct hl_device *hdev)
2074 {
2075         struct gaudi_device *gaudi = hdev->asic_specific;
2076         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2077
2078         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2079                 return;
2080
2081         /* Wait for all pending IRQs to be finished */
2082         if (gaudi->multi_msi_mode) {
2083                 for (i = 0 ; i < cq_cnt ; i++)
2084                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2085
2086                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2087                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2088                                                 true));
2089         } else {
2090                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2091         }
2092 }
2093
2094 static void gaudi_disable_msi(struct hl_device *hdev)
2095 {
2096         struct gaudi_device *gaudi = hdev->asic_specific;
2097         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2098
2099         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2100                 return;
2101
2102         gaudi_sync_irqs(hdev);
2103
2104         if (gaudi->multi_msi_mode) {
2105                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2106                                                 true);
2107                 free_irq(irq, &hdev->event_queue);
2108
2109                 for (i = 0 ; i < cq_cnt ; i++) {
2110                         irq = gaudi_pci_irq_vector(hdev, i, false);
2111                         free_irq(irq, &hdev->completion_queue[i]);
2112                 }
2113         } else {
2114                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2115         }
2116
2117         pci_free_irq_vectors(hdev->pdev);
2118
2119         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2120 }
2121
2122 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2123 {
2124         struct gaudi_device *gaudi = hdev->asic_specific;
2125
2126         if (hdev->asic_prop.fw_security_enabled)
2127                 return;
2128
2129         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2130                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2131                 return;
2132
2133         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2134                 return;
2135
2136         if (!hdev->sram_scrambler_enable)
2137                 return;
2138
2139         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2140                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2142                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2144                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2146                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2148                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2150                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2152                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2154                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2155
2156         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2157                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2159                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2161                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2163                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2165                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2167                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2168         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2169                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2170         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2171                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2172
2173         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2174                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2175         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2176                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2177         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2178                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2179         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2180                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2181         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2182                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2183         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2184                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2185         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2186                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2187         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2188                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2189
2190         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2191 }
2192
2193 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2194 {
2195         struct gaudi_device *gaudi = hdev->asic_specific;
2196
2197         if (hdev->asic_prop.fw_security_enabled)
2198                 return;
2199
2200         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2201                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2202                 return;
2203
2204         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2205                 return;
2206
2207         if (!hdev->dram_scrambler_enable)
2208                 return;
2209
2210         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2211                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2213                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2215                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2217                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2219                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2221                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2223                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2225                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226
2227         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2228                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2230                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2232                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2234                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2236                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2238                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2240                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2242                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2243
2244         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2245                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2247                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2249                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2251                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2253                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2255                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2257                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2259                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2260
2261         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2262 }
2263
2264 static void gaudi_init_e2e(struct hl_device *hdev)
2265 {
2266         if (hdev->asic_prop.fw_security_enabled)
2267                 return;
2268
2269         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2270                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2271                 return;
2272
2273         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2274         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2275         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2276         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2277
2278         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2279         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2280         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2281         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2282
2283         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2284         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2285         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2286         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2287
2288         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2289         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2290         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2291         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2292
2293         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2294         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2295         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2296         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2297
2298         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2299         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2300         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2301         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2302
2303         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2304         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2305         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2306         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2307
2308         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2309         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2310         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2311         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2312
2313         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2314         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2315         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2316         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2317
2318         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2319         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2320         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2321         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2322
2323         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2324         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2325         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2326         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2327
2328         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2329         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2330         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2331         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2332
2333         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2334         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2335         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2336         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2337
2338         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2339         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2340         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2341         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2342
2343         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2344         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2345         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2346         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2347
2348         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2349         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2350         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2351         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2352
2353         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2354         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2355         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2356         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2357
2358         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2359         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2360         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2361         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2362
2363         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2364         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2365         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2366         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2367
2368         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2369         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2370         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2371         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2372
2373         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2374         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2375         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2376         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2377
2378         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2379         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2380         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2381         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2382
2383         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2384         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2385         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2386         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2387
2388         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2389         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2390         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2391         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2392
2393         if (!hdev->dram_scrambler_enable) {
2394                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2395                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2396                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2397                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2398
2399                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2400                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2401                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2402                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2403
2404                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2405                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2406                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2407                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2408
2409                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2410                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2411                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2412                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2413
2414                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2415                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2416                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2417                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2418
2419                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2420                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2421                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2422                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2423
2424                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2425                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2426                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2427                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2428
2429                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2430                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2431                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2432                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2433
2434                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2435                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2436                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2437                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2438
2439                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2440                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2441                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2442                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2443
2444                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2445                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2446                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2447                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2448
2449                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2450                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2451                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2452                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2453
2454                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2455                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2456                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2457                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2458
2459                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2460                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2461                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2462                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2463
2464                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2465                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2466                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2467                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2468
2469                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2470                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2471                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2472                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2473
2474                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2475                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2476                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2477                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2478
2479                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2480                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2481                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2482                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2483
2484                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2485                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2486                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2487                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2488
2489                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2490                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2491                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2492                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2493
2494                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2495                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2496                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2497                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2498
2499                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2500                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2501                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2502                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2503
2504                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2505                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2506                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2507                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2508
2509                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2510                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2511                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2512                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2513         }
2514
2515         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2516                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2517         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2518                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2519
2520         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2521                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2522         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2523                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2524
2525         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2526                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2527         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2528                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2529
2530         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2531                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2532         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2533                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2534
2535         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2536                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2537         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2538                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2539
2540         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2541                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2542         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2543                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2544
2545         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2546                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2547         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2548                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2549
2550         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2551                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2552         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2553                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2554
2555         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2556                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2557         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2558                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2559
2560         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2561                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2562         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2563                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2564
2565         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2566                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2567         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2568                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2569
2570         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2571                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2572         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2573                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2574
2575         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2576                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2577         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2578                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2579
2580         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2581                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2582         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2583                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2584
2585         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2586                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2587         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2588                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2589
2590         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2591                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2592         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2593                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2594
2595         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2596                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2597         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2598                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2599
2600         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2601                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2602         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2603                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2604
2605         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2606                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2607         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2608                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2609
2610         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2611                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2612         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2613                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2614
2615         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2616                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2617         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2618                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2619
2620         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2621                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2622         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2623                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2624
2625         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2626                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2627         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2628                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2629
2630         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2631                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2632         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2633                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2634 }
2635
2636 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2637 {
2638         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2639
2640         if (hdev->asic_prop.fw_security_enabled)
2641                 return;
2642
2643         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2644                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2645                 return;
2646
2647         hbm0_wr = 0x33333333;
2648         hbm0_rd = 0x77777777;
2649         hbm1_wr = 0x55555555;
2650         hbm1_rd = 0xDDDDDDDD;
2651
2652         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2653         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2654         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2655         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2656
2657         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2658         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2659         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2660         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2661
2662         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2663         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2664         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2665         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2666
2667         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2668         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2669         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2670         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2671
2672         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2673                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2676                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2679                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2680                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2681         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2682                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2683                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2684
2685         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2686                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2687                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2688         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2689                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2690                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2691         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2692                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2693                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2694         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2695                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2696                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2697 }
2698
2699 static void gaudi_init_golden_registers(struct hl_device *hdev)
2700 {
2701         u32 tpc_offset;
2702         int tpc_id, i;
2703
2704         gaudi_init_e2e(hdev);
2705         gaudi_init_hbm_cred(hdev);
2706
2707         for (tpc_id = 0, tpc_offset = 0;
2708                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2709                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2710                 /* Mask all arithmetic interrupts from TPC */
2711                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2712                 /* Set 16 cache lines */
2713                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2714                                 ICACHE_FETCH_LINE_NUM, 2);
2715         }
2716
2717         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2718         for (i = 0 ; i < 128 ; i += 8)
2719                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2720
2721         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2722         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2723         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2724         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2725 }
2726
2727 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2728                                         int qman_id, dma_addr_t qman_pq_addr)
2729 {
2730         struct cpu_dyn_regs *dyn_regs =
2731                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2732         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2733         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2734         u32 q_off, dma_qm_offset;
2735         u32 dma_qm_err_cfg, irq_handler_offset;
2736
2737         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738
2739         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2740                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2742                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2743         so_base_en_lo = lower_32_bits(CFG_BASE +
2744                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745         so_base_en_hi = upper_32_bits(CFG_BASE +
2746                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2747         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2748                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2750                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2751         so_base_ws_lo = lower_32_bits(CFG_BASE +
2752                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753         so_base_ws_hi = upper_32_bits(CFG_BASE +
2754                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2755
2756         q_off = dma_qm_offset + qman_id * 4;
2757
2758         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2759         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2760
2761         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2762         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2763         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764
2765         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2766         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2767                                                         QMAN_LDMA_SRC_OFFSET);
2768         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2769                                                         QMAN_LDMA_DST_OFFSET);
2770
2771         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2772         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2773         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2774         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2775         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2776         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2777         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2778         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2779
2780         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2781
2782         /* The following configuration is needed only once per QMAN */
2783         if (qman_id == 0) {
2784                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2785                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2786                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2787
2788                 /* Configure RAZWI IRQ */
2789                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2790                 if (hdev->stop_on_err)
2791                         dma_qm_err_cfg |=
2792                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2793
2794                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2795
2796                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2797                         lower_32_bits(CFG_BASE + irq_handler_offset));
2798                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2799                         upper_32_bits(CFG_BASE + irq_handler_offset));
2800
2801                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2802                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2803                                                                         dma_id);
2804
2805                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2806                                 QM_ARB_ERR_MSG_EN_MASK);
2807
2808                 /* Increase ARB WDT to support streams architecture */
2809                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2810                                 GAUDI_ARB_WDT_TIMEOUT);
2811
2812                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2813                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2814
2815                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2816         }
2817 }
2818
2819 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2820 {
2821         struct cpu_dyn_regs *dyn_regs =
2822                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2823         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2824         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2825         u32 irq_handler_offset;
2826
2827         /* Set to maximum possible according to physical size */
2828         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2829         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2830
2831         /* WA for H/W bug H3-2116 */
2832         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2833
2834         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2835         if (hdev->stop_on_err)
2836                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2837
2838         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2839
2840         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2841                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2842                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2843
2844         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2845                 lower_32_bits(CFG_BASE + irq_handler_offset));
2846         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2847                 upper_32_bits(CFG_BASE + irq_handler_offset));
2848
2849         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2850                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2851         WREG32(mmDMA0_CORE_PROT + dma_offset,
2852                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2853         /* If the channel is secured, it should be in MMU bypass mode */
2854         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2855                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2856         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2857 }
2858
2859 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2860                                 u32 enable_mask)
2861 {
2862         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2863
2864         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2865 }
2866
2867 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2868 {
2869         struct gaudi_device *gaudi = hdev->asic_specific;
2870         struct hl_hw_queue *q;
2871         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2872
2873         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2874                 return;
2875
2876         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2877                 dma_id = gaudi_dma_assignment[i];
2878                 /*
2879                  * For queues after the CPU Q need to add 1 to get the correct
2880                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2881                  * order to get the correct MSI register.
2882                  */
2883                 if (dma_id > 1) {
2884                         cpu_skip = 1;
2885                         nic_skip = NIC_NUMBER_OF_ENGINES;
2886                 } else {
2887                         cpu_skip = 0;
2888                         nic_skip = 0;
2889                 }
2890
2891                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2892                         q_idx = 4 * dma_id + j + cpu_skip;
2893                         q = &hdev->kernel_queues[q_idx];
2894                         q->cq_id = cq_id++;
2895                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2896                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2897                                                 q->bus_address);
2898                 }
2899
2900                 gaudi_init_dma_core(hdev, dma_id);
2901
2902                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2903         }
2904
2905         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2906 }
2907
2908 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2909                                         int qman_id, u64 qman_base_addr)
2910 {
2911         struct cpu_dyn_regs *dyn_regs =
2912                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2913         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2914         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2915         u32 dma_qm_err_cfg, irq_handler_offset;
2916         u32 q_off, dma_qm_offset;
2917
2918         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2919
2920         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2921                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2922         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2923                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2924         so_base_en_lo = lower_32_bits(CFG_BASE +
2925                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2926         so_base_en_hi = upper_32_bits(CFG_BASE +
2927                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2928         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2929                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2930         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2931                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2932         so_base_ws_lo = lower_32_bits(CFG_BASE +
2933                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2934         so_base_ws_hi = upper_32_bits(CFG_BASE +
2935                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2936
2937         q_off = dma_qm_offset + qman_id * 4;
2938
2939         if (qman_id < 4) {
2940                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2941                                         lower_32_bits(qman_base_addr));
2942                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2943                                         upper_32_bits(qman_base_addr));
2944
2945                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2946                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2947                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2948
2949                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2950                                                         QMAN_CPDMA_SIZE_OFFSET);
2951                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2952                                                         QMAN_CPDMA_SRC_OFFSET);
2953                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2954                                                         QMAN_CPDMA_DST_OFFSET);
2955         } else {
2956                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2957                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2958                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2959
2960                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2961                                                         QMAN_LDMA_SIZE_OFFSET);
2962                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2963                                                         QMAN_LDMA_SRC_OFFSET);
2964                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2965                                                         QMAN_LDMA_DST_OFFSET);
2966
2967                 /* Configure RAZWI IRQ */
2968                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2969                 if (hdev->stop_on_err)
2970                         dma_qm_err_cfg |=
2971                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2972
2973                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2974
2975                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2976                         lower_32_bits(CFG_BASE + irq_handler_offset));
2977                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2978                         upper_32_bits(CFG_BASE + irq_handler_offset));
2979
2980                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2981                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2982                                                                         dma_id);
2983
2984                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2985                                 QM_ARB_ERR_MSG_EN_MASK);
2986
2987                 /* Increase ARB WDT to support streams architecture */
2988                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2989                                 GAUDI_ARB_WDT_TIMEOUT);
2990
2991                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2992                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2993                                 QMAN_INTERNAL_MAKE_TRUSTED);
2994         }
2995
2996         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2997         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2998         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2999         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3000
3001         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
3002         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
3003                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3004                                 mtr_base_ws_lo);
3005                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3006                                 mtr_base_ws_hi);
3007                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3008                                 so_base_ws_lo);
3009                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3010                                 so_base_ws_hi);
3011         }
3012 }
3013
3014 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
3015 {
3016         struct gaudi_device *gaudi = hdev->asic_specific;
3017         struct gaudi_internal_qman_info *q;
3018         u64 qman_base_addr;
3019         int i, j, dma_id, internal_q_index;
3020
3021         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3022                 return;
3023
3024         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3025                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3026
3027                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3028                          /*
3029                           * Add the CPU queue in order to get the correct queue
3030                           * number as all internal queue are placed after it
3031                           */
3032                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3033
3034                         q = &gaudi->internal_qmans[internal_q_index];
3035                         qman_base_addr = (u64) q->pq_dma_addr;
3036                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3037                                                 qman_base_addr);
3038                 }
3039
3040                 /* Initializing lower CP for HBM DMA QMAN */
3041                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3042
3043                 gaudi_init_dma_core(hdev, dma_id);
3044
3045                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3046         }
3047
3048         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3049 }
3050
3051 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3052                                         int qman_id, u64 qman_base_addr)
3053 {
3054         struct cpu_dyn_regs *dyn_regs =
3055                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3056         u32 mtr_base_lo, mtr_base_hi;
3057         u32 so_base_lo, so_base_hi;
3058         u32 irq_handler_offset;
3059         u32 q_off, mme_id;
3060         u32 mme_qm_err_cfg;
3061
3062         mtr_base_lo = lower_32_bits(CFG_BASE +
3063                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3064         mtr_base_hi = upper_32_bits(CFG_BASE +
3065                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3066         so_base_lo = lower_32_bits(CFG_BASE +
3067                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3068         so_base_hi = upper_32_bits(CFG_BASE +
3069                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3070
3071         q_off = mme_offset + qman_id * 4;
3072
3073         if (qman_id < 4) {
3074                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3075                                         lower_32_bits(qman_base_addr));
3076                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3077                                         upper_32_bits(qman_base_addr));
3078
3079                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3080                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3081                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3082
3083                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3084                                                         QMAN_CPDMA_SIZE_OFFSET);
3085                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3086                                                         QMAN_CPDMA_SRC_OFFSET);
3087                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3088                                                         QMAN_CPDMA_DST_OFFSET);
3089         } else {
3090                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3091                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3092                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3093
3094                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3095                                                         QMAN_LDMA_SIZE_OFFSET);
3096                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3097                                                         QMAN_LDMA_SRC_OFFSET);
3098                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3099                                                         QMAN_LDMA_DST_OFFSET);
3100
3101                 /* Configure RAZWI IRQ */
3102                 mme_id = mme_offset /
3103                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3104
3105                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3106                 if (hdev->stop_on_err)
3107                         mme_qm_err_cfg |=
3108                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3109
3110                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3111
3112                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3113                         lower_32_bits(CFG_BASE + irq_handler_offset));
3114                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3115                         upper_32_bits(CFG_BASE + irq_handler_offset));
3116
3117                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3118                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3119                                                                         mme_id);
3120
3121                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3122                                 QM_ARB_ERR_MSG_EN_MASK);
3123
3124                 /* Increase ARB WDT to support streams architecture */
3125                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3126                                 GAUDI_ARB_WDT_TIMEOUT);
3127
3128                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3129                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3130                                 QMAN_INTERNAL_MAKE_TRUSTED);
3131         }
3132
3133         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3134         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3135         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3136         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3137 }
3138
3139 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3140 {
3141         struct gaudi_device *gaudi = hdev->asic_specific;
3142         struct gaudi_internal_qman_info *q;
3143         u64 qman_base_addr;
3144         u32 mme_offset;
3145         int i, internal_q_index;
3146
3147         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3148                 return;
3149
3150         /*
3151          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3152          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3153          */
3154
3155         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3156
3157         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3158                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3159                 q = &gaudi->internal_qmans[internal_q_index];
3160                 qman_base_addr = (u64) q->pq_dma_addr;
3161                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3162                                         qman_base_addr);
3163                 if (i == 3)
3164                         mme_offset = 0;
3165         }
3166
3167         /* Initializing lower CP for MME QMANs */
3168         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3169         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3170         gaudi_init_mme_qman(hdev, 0, 4, 0);
3171
3172         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3173         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3174
3175         gaudi->hw_cap_initialized |= HW_CAP_MME;
3176 }
3177
3178 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3179                                 int qman_id, u64 qman_base_addr)
3180 {
3181         struct cpu_dyn_regs *dyn_regs =
3182                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3183         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3184         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3185         u32 tpc_qm_err_cfg, irq_handler_offset;
3186         u32 q_off, tpc_id;
3187
3188         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3189                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3190         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3191                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3192         so_base_en_lo = lower_32_bits(CFG_BASE +
3193                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3194         so_base_en_hi = upper_32_bits(CFG_BASE +
3195                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3196         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3197                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3198         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3199                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3200         so_base_ws_lo = lower_32_bits(CFG_BASE +
3201                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3202         so_base_ws_hi = upper_32_bits(CFG_BASE +
3203                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3204
3205         q_off = tpc_offset + qman_id * 4;
3206
3207         tpc_id = tpc_offset /
3208                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3209
3210         if (qman_id < 4) {
3211                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3212                                         lower_32_bits(qman_base_addr));
3213                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3214                                         upper_32_bits(qman_base_addr));
3215
3216                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3217                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3218                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3219
3220                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3221                                                         QMAN_CPDMA_SIZE_OFFSET);
3222                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3223                                                         QMAN_CPDMA_SRC_OFFSET);
3224                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3225                                                         QMAN_CPDMA_DST_OFFSET);
3226         } else {
3227                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3228                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3229                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3230
3231                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3232                                                         QMAN_LDMA_SIZE_OFFSET);
3233                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3234                                                         QMAN_LDMA_SRC_OFFSET);
3235                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3236                                                         QMAN_LDMA_DST_OFFSET);
3237
3238                 /* Configure RAZWI IRQ */
3239                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3240                 if (hdev->stop_on_err)
3241                         tpc_qm_err_cfg |=
3242                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3243
3244                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3245
3246                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3247                         lower_32_bits(CFG_BASE + irq_handler_offset));
3248                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3249                         upper_32_bits(CFG_BASE + irq_handler_offset));
3250
3251                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3252                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3253                                                                         tpc_id);
3254
3255                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3256                                 QM_ARB_ERR_MSG_EN_MASK);
3257
3258                 /* Increase ARB WDT to support streams architecture */
3259                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3260                                 GAUDI_ARB_WDT_TIMEOUT);
3261
3262                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3263                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3264                                 QMAN_INTERNAL_MAKE_TRUSTED);
3265         }
3266
3267         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3268         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3269         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3270         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3271
3272         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3273         if (tpc_id == 6) {
3274                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3275                                 mtr_base_ws_lo);
3276                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3277                                 mtr_base_ws_hi);
3278                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3279                                 so_base_ws_lo);
3280                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3281                                 so_base_ws_hi);
3282         }
3283 }
3284
3285 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3286 {
3287         struct gaudi_device *gaudi = hdev->asic_specific;
3288         struct gaudi_internal_qman_info *q;
3289         u64 qman_base_addr;
3290         u32 so_base_hi, tpc_offset = 0;
3291         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3292                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3293         int i, tpc_id, internal_q_index;
3294
3295         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3296                 return;
3297
3298         so_base_hi = upper_32_bits(CFG_BASE +
3299                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3300
3301         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3302                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3303                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3304                                                 tpc_id * QMAN_STREAMS + i;
3305                         q = &gaudi->internal_qmans[internal_q_index];
3306                         qman_base_addr = (u64) q->pq_dma_addr;
3307                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3308                                                 qman_base_addr);
3309
3310                         if (i == 3) {
3311                                 /* Initializing lower CP for TPC QMAN */
3312                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3313
3314                                 /* Enable the QMAN and TPC channel */
3315                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3316                                                 QMAN_TPC_ENABLE);
3317                         }
3318                 }
3319
3320                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3321                                 so_base_hi);
3322
3323                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3324
3325                 gaudi->hw_cap_initialized |=
3326                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3327         }
3328 }
3329
3330 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3331                                 int qman_id, u64 qman_base_addr, int nic_id)
3332 {
3333         struct cpu_dyn_regs *dyn_regs =
3334                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3335         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3336         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3337         u32 nic_qm_err_cfg, irq_handler_offset;
3338         u32 q_off;
3339
3340         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3341                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3342         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3343                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3344         so_base_en_lo = lower_32_bits(CFG_BASE +
3345                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3346         so_base_en_hi = upper_32_bits(CFG_BASE +
3347                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3348         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3349                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3350         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3351                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3352         so_base_ws_lo = lower_32_bits(CFG_BASE +
3353                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3354         so_base_ws_hi = upper_32_bits(CFG_BASE +
3355                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3356
3357         q_off = nic_offset + qman_id * 4;
3358
3359         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3360         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3361
3362         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3363         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3364         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3365
3366         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3367                                                         QMAN_LDMA_SIZE_OFFSET);
3368         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3369                                                         QMAN_LDMA_SRC_OFFSET);
3370         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3371                                                         QMAN_LDMA_DST_OFFSET);
3372
3373         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3374         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3375         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3376         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3377
3378         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3379         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3380         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3381         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3382         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3383
3384         if (qman_id == 0) {
3385                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3386                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3387                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3388
3389                 /* Configure RAZWI IRQ */
3390                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3391                 if (hdev->stop_on_err)
3392                         nic_qm_err_cfg |=
3393                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3394
3395                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3396
3397                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3398                         lower_32_bits(CFG_BASE + irq_handler_offset));
3399                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3400                         upper_32_bits(CFG_BASE + irq_handler_offset));
3401
3402                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3403                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3404                                                                         nic_id);
3405
3406                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3407                                 QM_ARB_ERR_MSG_EN_MASK);
3408
3409                 /* Increase ARB WDT to support streams architecture */
3410                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3411                                 GAUDI_ARB_WDT_TIMEOUT);
3412
3413                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3414                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3415                                 QMAN_INTERNAL_MAKE_TRUSTED);
3416         }
3417 }
3418
3419 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3420 {
3421         struct gaudi_device *gaudi = hdev->asic_specific;
3422         struct gaudi_internal_qman_info *q;
3423         u64 qman_base_addr;
3424         u32 nic_offset = 0;
3425         u32 nic_delta_between_qmans =
3426                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3427         u32 nic_delta_between_nics =
3428                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3429         int i, nic_id, internal_q_index;
3430
3431         if (!hdev->nic_ports_mask)
3432                 return;
3433
3434         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3435                 return;
3436
3437         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3438
3439         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3440                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3441                         nic_offset += nic_delta_between_qmans;
3442                         if (nic_id & 1) {
3443                                 nic_offset -= (nic_delta_between_qmans * 2);
3444                                 nic_offset += nic_delta_between_nics;
3445                         }
3446                         continue;
3447                 }
3448
3449                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3450                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3451                                                 nic_id * QMAN_STREAMS + i;
3452                         q = &gaudi->internal_qmans[internal_q_index];
3453                         qman_base_addr = (u64) q->pq_dma_addr;
3454                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3455                                                 qman_base_addr, nic_id);
3456                 }
3457
3458                 /* Enable the QMAN */
3459                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3460
3461                 nic_offset += nic_delta_between_qmans;
3462                 if (nic_id & 1) {
3463                         nic_offset -= (nic_delta_between_qmans * 2);
3464                         nic_offset += nic_delta_between_nics;
3465                 }
3466
3467                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3468         }
3469 }
3470
3471 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3472 {
3473         struct gaudi_device *gaudi = hdev->asic_specific;
3474
3475         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3476                 return;
3477
3478         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3479         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3480         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3481 }
3482
3483 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3484 {
3485         struct gaudi_device *gaudi = hdev->asic_specific;
3486
3487         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3488                 return;
3489
3490         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3491         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3492         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3493         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3494         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3495 }
3496
3497 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3498 {
3499         struct gaudi_device *gaudi = hdev->asic_specific;
3500
3501         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3502                 return;
3503
3504         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3505         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3506 }
3507
3508 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3509 {
3510         struct gaudi_device *gaudi = hdev->asic_specific;
3511         u32 tpc_offset = 0;
3512         int tpc_id;
3513
3514         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3515                 return;
3516
3517         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3518                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3519                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3520         }
3521 }
3522
3523 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3524 {
3525         struct gaudi_device *gaudi = hdev->asic_specific;
3526         u32 nic_mask, nic_offset = 0;
3527         u32 nic_delta_between_qmans =
3528                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3529         u32 nic_delta_between_nics =
3530                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3531         int nic_id;
3532
3533         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3534                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3535
3536                 if (gaudi->hw_cap_initialized & nic_mask)
3537                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3538
3539                 nic_offset += nic_delta_between_qmans;
3540                 if (nic_id & 1) {
3541                         nic_offset -= (nic_delta_between_qmans * 2);
3542                         nic_offset += nic_delta_between_nics;
3543                 }
3544         }
3545 }
3546
3547 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3548 {
3549         struct gaudi_device *gaudi = hdev->asic_specific;
3550
3551         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3552                 return;
3553
3554         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3555         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3556         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3557         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3558 }
3559
3560 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3561 {
3562         struct gaudi_device *gaudi = hdev->asic_specific;
3563
3564         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3565                 return;
3566
3567         /* Stop CPs of HBM DMA QMANs */
3568
3569         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3570         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3571         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3572         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3573         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3574 }
3575
3576 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3577 {
3578         struct gaudi_device *gaudi = hdev->asic_specific;
3579
3580         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3581                 return;
3582
3583         /* Stop CPs of MME QMANs */
3584         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3585         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3586 }
3587
3588 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3589 {
3590         struct gaudi_device *gaudi = hdev->asic_specific;
3591
3592         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3593                 return;
3594
3595         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3596         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3597         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3598         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3599         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3600         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3601         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3602         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3603 }
3604
3605 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3606 {
3607         struct gaudi_device *gaudi = hdev->asic_specific;
3608
3609         /* Stop upper CPs of QMANs */
3610
3611         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3612                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3613                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3614                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3615                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3616
3617         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3618                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3619                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3620                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3621                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3622
3623         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3624                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3625                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3626                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3627                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3628
3629         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3630                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3631                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3632                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3633                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3634
3635         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3636                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3637                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3638                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3639                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3640
3641         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3642                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3643                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3644                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3645                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3646
3647         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3648                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3649                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3650                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3651                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3652
3653         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3654                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3655                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3656                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3657                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3658
3659         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3660                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3661                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3662                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3663                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3664
3665         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3666                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3667                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3668                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3669                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3670 }
3671
3672 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3673 {
3674         struct gaudi_device *gaudi = hdev->asic_specific;
3675
3676         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3677                 return;
3678
3679         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3680         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3681         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3682 }
3683
3684 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3685 {
3686         struct gaudi_device *gaudi = hdev->asic_specific;
3687
3688         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3689                 return;
3690
3691         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3692         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3693         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3694         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3695         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3696 }
3697
3698 static void gaudi_mme_stall(struct hl_device *hdev)
3699 {
3700         struct gaudi_device *gaudi = hdev->asic_specific;
3701
3702         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3703                 return;
3704
3705         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3706         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3707         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3708         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3709         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3710         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3711         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3712         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3713         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3714         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3715         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3716         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3717         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3718         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3719         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3720         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3721         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3722 }
3723
3724 static void gaudi_tpc_stall(struct hl_device *hdev)
3725 {
3726         struct gaudi_device *gaudi = hdev->asic_specific;
3727
3728         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3729                 return;
3730
3731         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3732         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3733         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3734         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3735         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3736         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3737         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3738         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3739 }
3740
3741 static void gaudi_set_clock_gating(struct hl_device *hdev)
3742 {
3743         struct gaudi_device *gaudi = hdev->asic_specific;
3744         u32 qman_offset;
3745         bool enable;
3746         int i;
3747
3748         /* In case we are during debug session, don't enable the clock gate
3749          * as it may interfere
3750          */
3751         if (hdev->in_debug)
3752                 return;
3753
3754         if (hdev->asic_prop.fw_security_enabled)
3755                 return;
3756
3757         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3758                 enable = !!(hdev->clock_gating_mask &
3759                                 (BIT_ULL(gaudi_dma_assignment[i])));
3760
3761                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3762                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3763                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3764                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3765                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3766         }
3767
3768         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3769                 enable = !!(hdev->clock_gating_mask &
3770                                 (BIT_ULL(gaudi_dma_assignment[i])));
3771
3772                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3773                  * we need to not enable clock gating in that DMA
3774                  */
3775                 if (i == GAUDI_HBM_DMA_4)
3776                         enable = 0;
3777
3778                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3779                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3780                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3781                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3782                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3783         }
3784
3785         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3786         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3787         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3788
3789         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3790         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3791         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3792
3793         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3794                 enable = !!(hdev->clock_gating_mask &
3795                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3796
3797                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3798                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3799                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3800                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3801
3802                 qman_offset += TPC_QMAN_OFFSET;
3803         }
3804
3805         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3806 }
3807
3808 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3809 {
3810         struct gaudi_device *gaudi = hdev->asic_specific;
3811         u32 qman_offset;
3812         int i;
3813
3814         if (hdev->asic_prop.fw_security_enabled)
3815                 return;
3816
3817         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3818                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3819                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3820
3821                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3822         }
3823
3824         WREG32(mmMME0_QM_CGM_CFG, 0);
3825         WREG32(mmMME0_QM_CGM_CFG1, 0);
3826         WREG32(mmMME2_QM_CGM_CFG, 0);
3827         WREG32(mmMME2_QM_CGM_CFG1, 0);
3828
3829         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3830                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3831                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3832
3833                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3834         }
3835
3836         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3837 }
3838
3839 static void gaudi_enable_timestamp(struct hl_device *hdev)
3840 {
3841         /* Disable the timestamp counter */
3842         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3843
3844         /* Zero the lower/upper parts of the 64-bit counter */
3845         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3846         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3847
3848         /* Enable the counter */
3849         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3850 }
3851
3852 static void gaudi_disable_timestamp(struct hl_device *hdev)
3853 {
3854         /* Disable the timestamp counter */
3855         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3856 }
3857
3858 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3859 {
3860         u32 wait_timeout_ms;
3861
3862         dev_info(hdev->dev,
3863                 "Halting compute engines and disabling interrupts\n");
3864
3865         if (hdev->pldm)
3866                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3867         else
3868                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3869
3870         if (fw_reset)
3871                 goto skip_engines;
3872
3873         gaudi_stop_nic_qmans(hdev);
3874         gaudi_stop_mme_qmans(hdev);
3875         gaudi_stop_tpc_qmans(hdev);
3876         gaudi_stop_hbm_dma_qmans(hdev);
3877         gaudi_stop_pci_dma_qmans(hdev);
3878
3879         hdev->asic_funcs->disable_clock_gating(hdev);
3880
3881         msleep(wait_timeout_ms);
3882
3883         gaudi_pci_dma_stall(hdev);
3884         gaudi_hbm_dma_stall(hdev);
3885         gaudi_tpc_stall(hdev);
3886         gaudi_mme_stall(hdev);
3887
3888         msleep(wait_timeout_ms);
3889
3890         gaudi_disable_nic_qmans(hdev);
3891         gaudi_disable_mme_qmans(hdev);
3892         gaudi_disable_tpc_qmans(hdev);
3893         gaudi_disable_hbm_dma_qmans(hdev);
3894         gaudi_disable_pci_dma_qmans(hdev);
3895
3896         gaudi_disable_timestamp(hdev);
3897
3898 skip_engines:
3899         gaudi_disable_msi(hdev);
3900 }
3901
3902 static int gaudi_mmu_init(struct hl_device *hdev)
3903 {
3904         struct asic_fixed_properties *prop = &hdev->asic_prop;
3905         struct gaudi_device *gaudi = hdev->asic_specific;
3906         u64 hop0_addr;
3907         int rc, i;
3908
3909         if (!hdev->mmu_enable)
3910                 return 0;
3911
3912         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3913                 return 0;
3914
3915         for (i = 0 ; i < prop->max_asid ; i++) {
3916                 hop0_addr = prop->mmu_pgt_addr +
3917                                 (i * prop->mmu_hop_table_size);
3918
3919                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3920                 if (rc) {
3921                         dev_err(hdev->dev,
3922                                 "failed to set hop0 addr for asid %d\n", i);
3923                         goto err;
3924                 }
3925         }
3926
3927         /* init MMU cache manage page */
3928         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3929         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3930
3931         /* mem cache invalidation */
3932         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3933
3934         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3935
3936         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3937         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3938
3939         WREG32(mmSTLB_HOP_CONFIGURATION,
3940                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3941
3942         /*
3943          * The H/W expects the first PI after init to be 1. After wraparound
3944          * we'll write 0.
3945          */
3946         gaudi->mmu_cache_inv_pi = 1;
3947
3948         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3949
3950         return 0;
3951
3952 err:
3953         return rc;
3954 }
3955
3956 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3957 {
3958         void __iomem *dst;
3959
3960         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3961
3962         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3963 }
3964
3965 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3966 {
3967         void __iomem *dst;
3968
3969         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3970
3971         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3972 }
3973
3974 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3975 {
3976         struct dynamic_fw_load_mgr *dynamic_loader;
3977         struct cpu_dyn_regs *dyn_regs;
3978
3979         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3980
3981         /*
3982          * here we update initial values for few specific dynamic regs (as
3983          * before reading the first descriptor from FW those value has to be
3984          * hard-coded) in later stages of the protocol those values will be
3985          * updated automatically by reading the FW descriptor so data there
3986          * will always be up-to-date
3987          */
3988         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3989         dyn_regs->kmd_msg_to_cpu =
3990                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3991         dyn_regs->cpu_cmd_status_to_host =
3992                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3993
3994         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3995 }
3996
3997 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3998 {
3999         struct static_fw_load_mgr *static_loader;
4000
4001         static_loader = &hdev->fw_loader.static_loader;
4002
4003         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
4004         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
4005         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
4006         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
4007         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4008         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
4009         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
4010         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
4011         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
4012         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
4013         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
4014         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
4015         static_loader->cpu_reset_wait_msec = hdev->pldm ?
4016                         GAUDI_PLDM_RESET_WAIT_MSEC :
4017                         GAUDI_CPU_RESET_WAIT_MSEC;
4018 }
4019
4020 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4021 {
4022         struct asic_fixed_properties *prop = &hdev->asic_prop;
4023         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4024
4025         /* fill common fields */
4026         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4027         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4028         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4029         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4030         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4031         fw_loader->skip_bmc = !hdev->bmc_enable;
4032         fw_loader->sram_bar_id = SRAM_BAR_ID;
4033         fw_loader->dram_bar_id = HBM_BAR_ID;
4034
4035         if (prop->dynamic_fw_load)
4036                 gaudi_init_dynamic_firmware_loader(hdev);
4037         else
4038                 gaudi_init_static_firmware_loader(hdev);
4039 }
4040
4041 static int gaudi_init_cpu(struct hl_device *hdev)
4042 {
4043         struct gaudi_device *gaudi = hdev->asic_specific;
4044         int rc;
4045
4046         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4047                 return 0;
4048
4049         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4050                 return 0;
4051
4052         /*
4053          * The device CPU works with 40 bits addresses.
4054          * This register sets the extension to 50 bits.
4055          */
4056         if (!hdev->asic_prop.fw_security_enabled)
4057                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4058
4059         rc = hl_fw_init_cpu(hdev);
4060
4061         if (rc)
4062                 return rc;
4063
4064         gaudi->hw_cap_initialized |= HW_CAP_CPU;
4065
4066         return 0;
4067 }
4068
4069 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4070 {
4071         struct cpu_dyn_regs *dyn_regs =
4072                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4073         struct asic_fixed_properties *prop = &hdev->asic_prop;
4074         struct gaudi_device *gaudi = hdev->asic_specific;
4075         u32 status, irq_handler_offset;
4076         struct hl_eq *eq;
4077         struct hl_hw_queue *cpu_pq =
4078                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4079         int err;
4080
4081         if (!hdev->cpu_queues_enable)
4082                 return 0;
4083
4084         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4085                 return 0;
4086
4087         eq = &hdev->event_queue;
4088
4089         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4090         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4091
4092         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4093         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4094
4095         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4096                         lower_32_bits(hdev->cpu_accessible_dma_address));
4097         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4098                         upper_32_bits(hdev->cpu_accessible_dma_address));
4099
4100         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4101         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4102         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4103
4104         /* Used for EQ CI */
4105         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4106
4107         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4108
4109         if (gaudi->multi_msi_mode)
4110                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4111         else
4112                 WREG32(mmCPU_IF_QUEUE_INIT,
4113                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4114
4115         irq_handler_offset = prop->gic_interrupts_enable ?
4116                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4117                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4118
4119         WREG32(irq_handler_offset,
4120                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4121
4122         err = hl_poll_timeout(
4123                 hdev,
4124                 mmCPU_IF_QUEUE_INIT,
4125                 status,
4126                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4127                 1000,
4128                 cpu_timeout);
4129
4130         if (err) {
4131                 dev_err(hdev->dev,
4132                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4133                 return -EIO;
4134         }
4135
4136         /* update FW application security bits */
4137         if (prop->fw_cpu_boot_dev_sts0_valid)
4138                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4139         if (prop->fw_cpu_boot_dev_sts1_valid)
4140                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4141
4142         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4143         return 0;
4144 }
4145
4146 static void gaudi_pre_hw_init(struct hl_device *hdev)
4147 {
4148         /* Perform read from the device to make sure device is up */
4149         RREG32(mmHW_STATE);
4150
4151         if (!hdev->asic_prop.fw_security_enabled) {
4152                 /* Set the access through PCI bars (Linux driver only) as
4153                  * secured
4154                  */
4155                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4156                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4157                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4158
4159                 /* Perform read to flush the waiting writes to ensure
4160                  * configuration was set in the device
4161                  */
4162                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4163         }
4164
4165         /*
4166          * Let's mark in the H/W that we have reached this point. We check
4167          * this value in the reset_before_init function to understand whether
4168          * we need to reset the chip before doing H/W init. This register is
4169          * cleared by the H/W upon H/W reset
4170          */
4171         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4172 }
4173
4174 static int gaudi_hw_init(struct hl_device *hdev)
4175 {
4176         struct gaudi_device *gaudi = hdev->asic_specific;
4177         int rc;
4178
4179         gaudi_pre_hw_init(hdev);
4180
4181         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4182          * So we set it here and if anyone tries to move it later to
4183          * a different address, there will be an error
4184          */
4185         if (hdev->asic_prop.iatu_done_by_fw)
4186                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4187
4188         /*
4189          * Before pushing u-boot/linux to device, need to set the hbm bar to
4190          * base address of dram
4191          */
4192         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4193                 dev_err(hdev->dev,
4194                         "failed to map HBM bar to DRAM base address\n");
4195                 return -EIO;
4196         }
4197
4198         rc = gaudi_init_cpu(hdev);
4199         if (rc) {
4200                 dev_err(hdev->dev, "failed to initialize CPU\n");
4201                 return rc;
4202         }
4203
4204         /* In case the clock gating was enabled in preboot we need to disable
4205          * it here before touching the MME/TPC registers.
4206          * There is no need to take clk gating mutex because when this function
4207          * runs, no other relevant code can run
4208          */
4209         hdev->asic_funcs->disable_clock_gating(hdev);
4210
4211         /* SRAM scrambler must be initialized after CPU is running from HBM */
4212         gaudi_init_scrambler_sram(hdev);
4213
4214         /* This is here just in case we are working without CPU */
4215         gaudi_init_scrambler_hbm(hdev);
4216
4217         gaudi_init_golden_registers(hdev);
4218
4219         rc = gaudi_mmu_init(hdev);
4220         if (rc)
4221                 return rc;
4222
4223         gaudi_init_security(hdev);
4224
4225         gaudi_init_pci_dma_qmans(hdev);
4226
4227         gaudi_init_hbm_dma_qmans(hdev);
4228
4229         gaudi_init_mme_qmans(hdev);
4230
4231         gaudi_init_tpc_qmans(hdev);
4232
4233         gaudi_init_nic_qmans(hdev);
4234
4235         hdev->asic_funcs->set_clock_gating(hdev);
4236
4237         gaudi_enable_timestamp(hdev);
4238
4239         /* MSI must be enabled before CPU queues and NIC are initialized */
4240         rc = gaudi_enable_msi(hdev);
4241         if (rc)
4242                 goto disable_queues;
4243
4244         /* must be called after MSI was enabled */
4245         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4246         if (rc) {
4247                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4248                         rc);
4249                 goto disable_msi;
4250         }
4251
4252         /* Perform read from the device to flush all configuration */
4253         RREG32(mmHW_STATE);
4254
4255         return 0;
4256
4257 disable_msi:
4258         gaudi_disable_msi(hdev);
4259 disable_queues:
4260         gaudi_disable_mme_qmans(hdev);
4261         gaudi_disable_pci_dma_qmans(hdev);
4262
4263         return rc;
4264 }
4265
4266 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4267 {
4268         struct cpu_dyn_regs *dyn_regs =
4269                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4270         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4271         struct gaudi_device *gaudi = hdev->asic_specific;
4272         bool driver_performs_reset;
4273
4274         if (!hard_reset) {
4275                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4276                 return;
4277         }
4278
4279         if (hdev->pldm) {
4280                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4281                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4282         } else {
4283                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4284                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4285         }
4286
4287         if (fw_reset) {
4288                 dev_info(hdev->dev,
4289                         "Firmware performs HARD reset, going to wait %dms\n",
4290                         reset_timeout_ms);
4291
4292                 goto skip_reset;
4293         }
4294
4295         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4296                                         !hdev->asic_prop.hard_reset_done_by_fw);
4297
4298         /* Set device to handle FLR by H/W as we will put the device CPU to
4299          * halt mode
4300          */
4301         if (driver_performs_reset)
4302                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4303                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4304
4305         /* If linux is loaded in the device CPU we need to communicate with it
4306          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4307          * registers in case of old F/Ws
4308          */
4309         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4310                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4311                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4312                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4313
4314                 WREG32(irq_handler_offset,
4315                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4316
4317                 /* This is a hail-mary attempt to revive the card in the small chance that the
4318                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4319                  * In that case, triggering reset through GIC won't help. We need to trigger the
4320                  * reset as if Linux wasn't loaded.
4321                  *
4322                  * We do it only if the reset cause was HB, because that would be the indication
4323                  * of such an event.
4324                  *
4325                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4326                  * damage.
4327                  */
4328                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4329                         if (hdev->asic_prop.hard_reset_done_by_fw)
4330                                 hl_fw_ask_hard_reset_without_linux(hdev);
4331                         else
4332                                 hl_fw_ask_halt_machine_without_linux(hdev);
4333                 }
4334         } else {
4335                 if (hdev->asic_prop.hard_reset_done_by_fw)
4336                         hl_fw_ask_hard_reset_without_linux(hdev);
4337                 else
4338                         hl_fw_ask_halt_machine_without_linux(hdev);
4339         }
4340
4341         if (driver_performs_reset) {
4342
4343                 /* Configure the reset registers. Must be done as early as
4344                  * possible in case we fail during H/W initialization
4345                  */
4346                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4347                                                 (CFG_RST_H_DMA_MASK |
4348                                                 CFG_RST_H_MME_MASK |
4349                                                 CFG_RST_H_SM_MASK |
4350                                                 CFG_RST_H_TPC_7_MASK));
4351
4352                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4353
4354                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4355                                                 (CFG_RST_H_HBM_MASK |
4356                                                 CFG_RST_H_TPC_7_MASK |
4357                                                 CFG_RST_H_NIC_MASK |
4358                                                 CFG_RST_H_SM_MASK |
4359                                                 CFG_RST_H_DMA_MASK |
4360                                                 CFG_RST_H_MME_MASK |
4361                                                 CFG_RST_H_CPU_MASK |
4362                                                 CFG_RST_H_MMU_MASK));
4363
4364                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4365                                                 (CFG_RST_L_IF_MASK |
4366                                                 CFG_RST_L_PSOC_MASK |
4367                                                 CFG_RST_L_TPC_MASK));
4368
4369                 msleep(cpu_timeout_ms);
4370
4371                 /* Tell ASIC not to re-initialize PCIe */
4372                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4373
4374                 /* Restart BTL/BLR upon hard-reset */
4375                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4376
4377                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4378                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4379
4380                 dev_info(hdev->dev,
4381                         "Issued HARD reset command, going to wait %dms\n",
4382                         reset_timeout_ms);
4383         } else {
4384                 dev_info(hdev->dev,
4385                         "Firmware performs HARD reset, going to wait %dms\n",
4386                         reset_timeout_ms);
4387         }
4388
4389 skip_reset:
4390         /*
4391          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4392          * itself is in reset. Need to wait until the reset is deasserted
4393          */
4394         msleep(reset_timeout_ms);
4395
4396         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4397         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4398                 dev_err(hdev->dev,
4399                         "Timeout while waiting for device to reset 0x%x\n",
4400                         status);
4401
4402         if (gaudi) {
4403                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4404                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4405                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4406                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4407                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4408                                 HW_CAP_SRAM_SCRAMBLER |
4409                                 HW_CAP_HBM_SCRAMBLER |
4410                                 HW_CAP_CLK_GATE);
4411
4412                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4413
4414                 hdev->device_cpu_is_halted = false;
4415         }
4416 }
4417
4418 static int gaudi_suspend(struct hl_device *hdev)
4419 {
4420         int rc;
4421
4422         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4423         if (rc)
4424                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4425
4426         return rc;
4427 }
4428
4429 static int gaudi_resume(struct hl_device *hdev)
4430 {
4431         return gaudi_init_iatu(hdev);
4432 }
4433
4434 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4435                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4436 {
4437         int rc;
4438
4439         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4440                         VM_DONTCOPY | VM_NORESERVE;
4441
4442         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4443                                 (dma_addr - HOST_PHYS_BASE), size);
4444         if (rc)
4445                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4446
4447         return rc;
4448 }
4449
4450 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4451 {
4452         struct cpu_dyn_regs *dyn_regs =
4453                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4454         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4455         struct gaudi_device *gaudi = hdev->asic_specific;
4456         bool invalid_queue = false;
4457         int dma_id;
4458
4459         switch (hw_queue_id) {
4460         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4461                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4462                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4463                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4464                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4465                 break;
4466
4467         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4468                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4469                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4470                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4471                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4475                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4476                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4477                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4478                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4479                 break;
4480
4481         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4482                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4483                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4484                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4485                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4486                 break;
4487
4488         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4489                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4490                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4491                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4492                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4493                 break;
4494
4495         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4496                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4497                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4498                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4499                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4500                 break;
4501
4502         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4503                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4504                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4505                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4506                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4507                 break;
4508
4509         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4510                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4511                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4512                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4513                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4514                 break;
4515
4516         case GAUDI_QUEUE_ID_CPU_PQ:
4517                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4518                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4519                 else
4520                         invalid_queue = true;
4521                 break;
4522
4523         case GAUDI_QUEUE_ID_MME_0_0:
4524                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4525                 break;
4526
4527         case GAUDI_QUEUE_ID_MME_0_1:
4528                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4529                 break;
4530
4531         case GAUDI_QUEUE_ID_MME_0_2:
4532                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4533                 break;
4534
4535         case GAUDI_QUEUE_ID_MME_0_3:
4536                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4537                 break;
4538
4539         case GAUDI_QUEUE_ID_MME_1_0:
4540                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4541                 break;
4542
4543         case GAUDI_QUEUE_ID_MME_1_1:
4544                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4545                 break;
4546
4547         case GAUDI_QUEUE_ID_MME_1_2:
4548                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4549                 break;
4550
4551         case GAUDI_QUEUE_ID_MME_1_3:
4552                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4553                 break;
4554
4555         case GAUDI_QUEUE_ID_TPC_0_0:
4556                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4557                 break;
4558
4559         case GAUDI_QUEUE_ID_TPC_0_1:
4560                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4561                 break;
4562
4563         case GAUDI_QUEUE_ID_TPC_0_2:
4564                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4565                 break;
4566
4567         case GAUDI_QUEUE_ID_TPC_0_3:
4568                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4569                 break;
4570
4571         case GAUDI_QUEUE_ID_TPC_1_0:
4572                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4573                 break;
4574
4575         case GAUDI_QUEUE_ID_TPC_1_1:
4576                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4577                 break;
4578
4579         case GAUDI_QUEUE_ID_TPC_1_2:
4580                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4581                 break;
4582
4583         case GAUDI_QUEUE_ID_TPC_1_3:
4584                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4585                 break;
4586
4587         case GAUDI_QUEUE_ID_TPC_2_0:
4588                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4589                 break;
4590
4591         case GAUDI_QUEUE_ID_TPC_2_1:
4592                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4593                 break;
4594
4595         case GAUDI_QUEUE_ID_TPC_2_2:
4596                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4597                 break;
4598
4599         case GAUDI_QUEUE_ID_TPC_2_3:
4600                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4601                 break;
4602
4603         case GAUDI_QUEUE_ID_TPC_3_0:
4604                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4605                 break;
4606
4607         case GAUDI_QUEUE_ID_TPC_3_1:
4608                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4609                 break;
4610
4611         case GAUDI_QUEUE_ID_TPC_3_2:
4612                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4613                 break;
4614
4615         case GAUDI_QUEUE_ID_TPC_3_3:
4616                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4617                 break;
4618
4619         case GAUDI_QUEUE_ID_TPC_4_0:
4620                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4621                 break;
4622
4623         case GAUDI_QUEUE_ID_TPC_4_1:
4624                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4625                 break;
4626
4627         case GAUDI_QUEUE_ID_TPC_4_2:
4628                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4629                 break;
4630
4631         case GAUDI_QUEUE_ID_TPC_4_3:
4632                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4633                 break;
4634
4635         case GAUDI_QUEUE_ID_TPC_5_0:
4636                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4637                 break;
4638
4639         case GAUDI_QUEUE_ID_TPC_5_1:
4640                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4641                 break;
4642
4643         case GAUDI_QUEUE_ID_TPC_5_2:
4644                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4645                 break;
4646
4647         case GAUDI_QUEUE_ID_TPC_5_3:
4648                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4649                 break;
4650
4651         case GAUDI_QUEUE_ID_TPC_6_0:
4652                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4653                 break;
4654
4655         case GAUDI_QUEUE_ID_TPC_6_1:
4656                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4657                 break;
4658
4659         case GAUDI_QUEUE_ID_TPC_6_2:
4660                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4661                 break;
4662
4663         case GAUDI_QUEUE_ID_TPC_6_3:
4664                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4665                 break;
4666
4667         case GAUDI_QUEUE_ID_TPC_7_0:
4668                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4669                 break;
4670
4671         case GAUDI_QUEUE_ID_TPC_7_1:
4672                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4673                 break;
4674
4675         case GAUDI_QUEUE_ID_TPC_7_2:
4676                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4677                 break;
4678
4679         case GAUDI_QUEUE_ID_TPC_7_3:
4680                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4681                 break;
4682
4683         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4684                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4685                         invalid_queue = true;
4686
4687                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4688                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4689                 break;
4690
4691         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4692                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4693                         invalid_queue = true;
4694
4695                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4696                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4697                 break;
4698
4699         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4700                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4701                         invalid_queue = true;
4702
4703                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4704                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4705                 break;
4706
4707         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4708                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4709                         invalid_queue = true;
4710
4711                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4712                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4713                 break;
4714
4715         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4716                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4717                         invalid_queue = true;
4718
4719                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4720                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4721                 break;
4722
4723         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4724                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4725                         invalid_queue = true;
4726
4727                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4728                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4729                 break;
4730
4731         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4732                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4733                         invalid_queue = true;
4734
4735                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4736                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4737                 break;
4738
4739         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4740                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4741                         invalid_queue = true;
4742
4743                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4744                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4745                 break;
4746
4747         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4748                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4749                         invalid_queue = true;
4750
4751                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4752                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4753                 break;
4754
4755         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4756                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4757                         invalid_queue = true;
4758
4759                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4760                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4761                 break;
4762
4763         default:
4764                 invalid_queue = true;
4765         }
4766
4767         if (invalid_queue) {
4768                 /* Should never get here */
4769                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4770                         hw_queue_id);
4771                 return;
4772         }
4773
4774         db_value = pi;
4775
4776         /* ring the doorbell */
4777         WREG32(db_reg_offset, db_value);
4778
4779         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4780                 /* make sure device CPU will read latest data from host */
4781                 mb();
4782
4783                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4784                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4785                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4786
4787                 WREG32(irq_handler_offset,
4788                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4789         }
4790 }
4791
4792 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4793                                 struct hl_bd *bd)
4794 {
4795         __le64 *pbd = (__le64 *) bd;
4796
4797         /* The QMANs are on the host memory so a simple copy suffice */
4798         pqe[0] = pbd[0];
4799         pqe[1] = pbd[1];
4800 }
4801
4802 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4803                                         dma_addr_t *dma_handle, gfp_t flags)
4804 {
4805         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4806                                                 dma_handle, flags);
4807
4808         /* Shift to the device's base physical address of host memory */
4809         if (kernel_addr)
4810                 *dma_handle += HOST_PHYS_BASE;
4811
4812         return kernel_addr;
4813 }
4814
4815 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4816                 void *cpu_addr, dma_addr_t dma_handle)
4817 {
4818         /* Cancel the device's base physical address of host memory */
4819         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4820
4821         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4822 }
4823
4824 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4825 {
4826         struct asic_fixed_properties *prop = &hdev->asic_prop;
4827         u64  cur_addr = DRAM_BASE_ADDR_USER;
4828         u32 val;
4829         u32 chunk_size;
4830         int rc, dma_id;
4831
4832         while (cur_addr < prop->dram_end_address) {
4833                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4834                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4835
4836                         chunk_size =
4837                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4838
4839                         dev_dbg(hdev->dev,
4840                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4841                                 cur_addr, cur_addr + chunk_size);
4842
4843                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4844                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4845                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4846                                                 lower_32_bits(cur_addr));
4847                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4848                                                 upper_32_bits(cur_addr));
4849                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4850                                         chunk_size);
4851                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4852                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4853                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4854
4855                         cur_addr += chunk_size;
4856
4857                         if (cur_addr == prop->dram_end_address)
4858                                 break;
4859                 }
4860
4861                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4862                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4863
4864                         rc = hl_poll_timeout(
4865                                 hdev,
4866                                 mmDMA0_CORE_STS0 + dma_offset,
4867                                 val,
4868                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4869                                 1000,
4870                                 HBM_SCRUBBING_TIMEOUT_US);
4871
4872                         if (rc) {
4873                                 dev_err(hdev->dev,
4874                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4875                                         dma_id);
4876                                 return -EIO;
4877                         }
4878                 }
4879         }
4880
4881         return 0;
4882 }
4883
4884 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4885 {
4886         struct asic_fixed_properties *prop = &hdev->asic_prop;
4887         struct gaudi_device *gaudi = hdev->asic_specific;
4888         int rc = 0;
4889         u64 val = 0;
4890
4891         if (!hdev->memory_scrub)
4892                 return 0;
4893
4894         if (!addr && !size) {
4895                 /* Wait till device is idle */
4896                 rc = hl_poll_timeout(
4897                                 hdev,
4898                                 mmDMA0_CORE_STS0/* dummy */,
4899                                 val/* dummy */,
4900                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4901                                                 0, NULL)),
4902                                                 1000,
4903                                                 HBM_SCRUBBING_TIMEOUT_US);
4904                 if (rc) {
4905                         dev_err(hdev->dev, "waiting for idle timeout\n");
4906                         return -EIO;
4907                 }
4908
4909                 /* Scrub SRAM */
4910                 addr = prop->sram_user_base_address;
4911                 size = hdev->pldm ? 0x10000 :
4912                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4913                 val = 0x7777777777777777ull;
4914
4915                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4916                 if (rc) {
4917                         dev_err(hdev->dev,
4918                                 "Failed to clear SRAM in mem scrub all\n");
4919                         return rc;
4920                 }
4921
4922                 mutex_lock(&gaudi->clk_gate_mutex);
4923                 hdev->asic_funcs->disable_clock_gating(hdev);
4924
4925                 /* Scrub HBM using all DMA channels in parallel */
4926                 rc = gaudi_hbm_scrubbing(hdev);
4927                 if (rc)
4928                         dev_err(hdev->dev,
4929                                 "Failed to clear HBM in mem scrub all\n");
4930
4931                 hdev->asic_funcs->set_clock_gating(hdev);
4932                 mutex_unlock(&gaudi->clk_gate_mutex);
4933         }
4934
4935         return rc;
4936 }
4937
4938 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4939                                 u32 queue_id, dma_addr_t *dma_handle,
4940                                 u16 *queue_len)
4941 {
4942         struct gaudi_device *gaudi = hdev->asic_specific;
4943         struct gaudi_internal_qman_info *q;
4944
4945         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4946                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4947                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4948                 return NULL;
4949         }
4950
4951         q = &gaudi->internal_qmans[queue_id];
4952         *dma_handle = q->pq_dma_addr;
4953         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4954
4955         return q->pq_kernel_addr;
4956 }
4957
4958 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4959                                 u16 len, u32 timeout, u64 *result)
4960 {
4961         struct gaudi_device *gaudi = hdev->asic_specific;
4962
4963         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4964                 if (result)
4965                         *result = 0;
4966                 return 0;
4967         }
4968
4969         if (!timeout)
4970                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4971
4972         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4973                                                 timeout, result);
4974 }
4975
4976 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4977 {
4978         struct packet_msg_prot *fence_pkt;
4979         dma_addr_t pkt_dma_addr;
4980         u32 fence_val, tmp, timeout_usec;
4981         dma_addr_t fence_dma_addr;
4982         u32 *fence_ptr;
4983         int rc;
4984
4985         if (hdev->pldm)
4986                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4987         else
4988                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4989
4990         fence_val = GAUDI_QMAN0_FENCE_VAL;
4991
4992         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4993                                                         &fence_dma_addr);
4994         if (!fence_ptr) {
4995                 dev_err(hdev->dev,
4996                         "Failed to allocate memory for H/W queue %d testing\n",
4997                         hw_queue_id);
4998                 return -ENOMEM;
4999         }
5000
5001         *fence_ptr = 0;
5002
5003         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
5004                                         sizeof(struct packet_msg_prot),
5005                                         GFP_KERNEL, &pkt_dma_addr);
5006         if (!fence_pkt) {
5007                 dev_err(hdev->dev,
5008                         "Failed to allocate packet for H/W queue %d testing\n",
5009                         hw_queue_id);
5010                 rc = -ENOMEM;
5011                 goto free_fence_ptr;
5012         }
5013
5014         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5015         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5016         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5017
5018         fence_pkt->ctl = cpu_to_le32(tmp);
5019         fence_pkt->value = cpu_to_le32(fence_val);
5020         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
5021
5022         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
5023                                         sizeof(struct packet_msg_prot),
5024                                         pkt_dma_addr);
5025         if (rc) {
5026                 dev_err(hdev->dev,
5027                         "Failed to send fence packet to H/W queue %d\n",
5028                         hw_queue_id);
5029                 goto free_pkt;
5030         }
5031
5032         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
5033                                         1000, timeout_usec, true);
5034
5035         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
5036
5037         if (rc == -ETIMEDOUT) {
5038                 dev_err(hdev->dev,
5039                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5040                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5041                 rc = -EIO;
5042         }
5043
5044 free_pkt:
5045         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5046                                         pkt_dma_addr);
5047 free_fence_ptr:
5048         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5049                                         fence_dma_addr);
5050         return rc;
5051 }
5052
5053 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5054 {
5055         struct gaudi_device *gaudi = hdev->asic_specific;
5056
5057         /*
5058          * check capability here as send_cpu_message() won't update the result
5059          * value if no capability
5060          */
5061         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5062                 return 0;
5063
5064         return hl_fw_test_cpu_queue(hdev);
5065 }
5066
5067 static int gaudi_test_queues(struct hl_device *hdev)
5068 {
5069         int i, rc, ret_val = 0;
5070
5071         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5072                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5073                         rc = gaudi_test_queue(hdev, i);
5074                         if (rc)
5075                                 ret_val = -EINVAL;
5076                 }
5077         }
5078
5079         rc = gaudi_test_cpu_queue(hdev);
5080         if (rc)
5081                 ret_val = -EINVAL;
5082
5083         return ret_val;
5084 }
5085
5086 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5087                 gfp_t mem_flags, dma_addr_t *dma_handle)
5088 {
5089         void *kernel_addr;
5090
5091         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5092                 return NULL;
5093
5094         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5095
5096         /* Shift to the device's base physical address of host memory */
5097         if (kernel_addr)
5098                 *dma_handle += HOST_PHYS_BASE;
5099
5100         return kernel_addr;
5101 }
5102
5103 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5104                         dma_addr_t dma_addr)
5105 {
5106         /* Cancel the device's base physical address of host memory */
5107         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5108
5109         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5110 }
5111
5112 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5113                                         size_t size, dma_addr_t *dma_handle)
5114 {
5115         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5116 }
5117
5118 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5119                                                 size_t size, void *vaddr)
5120 {
5121         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5122 }
5123
5124 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5125                         int nents, enum dma_data_direction dir)
5126 {
5127         struct scatterlist *sg;
5128         int i;
5129
5130         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5131                 return -ENOMEM;
5132
5133         /* Shift to the device's base physical address of host memory */
5134         for_each_sg(sgl, sg, nents, i)
5135                 sg->dma_address += HOST_PHYS_BASE;
5136
5137         return 0;
5138 }
5139
5140 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5141                         int nents, enum dma_data_direction dir)
5142 {
5143         struct scatterlist *sg;
5144         int i;
5145
5146         /* Cancel the device's base physical address of host memory */
5147         for_each_sg(sgl, sg, nents, i)
5148                 sg->dma_address -= HOST_PHYS_BASE;
5149
5150         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5151 }
5152
5153 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5154                                         struct sg_table *sgt)
5155 {
5156         struct scatterlist *sg, *sg_next_iter;
5157         u32 count, dma_desc_cnt;
5158         u64 len, len_next;
5159         dma_addr_t addr, addr_next;
5160
5161         dma_desc_cnt = 0;
5162
5163         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5164
5165                 len = sg_dma_len(sg);
5166                 addr = sg_dma_address(sg);
5167
5168                 if (len == 0)
5169                         break;
5170
5171                 while ((count + 1) < sgt->nents) {
5172                         sg_next_iter = sg_next(sg);
5173                         len_next = sg_dma_len(sg_next_iter);
5174                         addr_next = sg_dma_address(sg_next_iter);
5175
5176                         if (len_next == 0)
5177                                 break;
5178
5179                         if ((addr + len == addr_next) &&
5180                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5181                                 len += len_next;
5182                                 count++;
5183                                 sg = sg_next_iter;
5184                         } else {
5185                                 break;
5186                         }
5187                 }
5188
5189                 dma_desc_cnt++;
5190         }
5191
5192         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5193 }
5194
5195 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5196                                 struct hl_cs_parser *parser,
5197                                 struct packet_lin_dma *user_dma_pkt,
5198                                 u64 addr, enum dma_data_direction dir)
5199 {
5200         struct hl_userptr *userptr;
5201         int rc;
5202
5203         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5204                         parser->job_userptr_list, &userptr))
5205                 goto already_pinned;
5206
5207         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5208         if (!userptr)
5209                 return -ENOMEM;
5210
5211         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5212                                 userptr);
5213         if (rc)
5214                 goto free_userptr;
5215
5216         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5217
5218         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5219                                         userptr->sgt->nents, dir);
5220         if (rc) {
5221                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5222                 goto unpin_memory;
5223         }
5224
5225         userptr->dma_mapped = true;
5226         userptr->dir = dir;
5227
5228 already_pinned:
5229         parser->patched_cb_size +=
5230                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5231
5232         return 0;
5233
5234 unpin_memory:
5235         list_del(&userptr->job_node);
5236         hl_unpin_host_memory(hdev, userptr);
5237 free_userptr:
5238         kfree(userptr);
5239         return rc;
5240 }
5241
5242 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5243                                 struct hl_cs_parser *parser,
5244                                 struct packet_lin_dma *user_dma_pkt,
5245                                 bool src_in_host)
5246 {
5247         enum dma_data_direction dir;
5248         bool skip_host_mem_pin = false, user_memset;
5249         u64 addr;
5250         int rc = 0;
5251
5252         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5253                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5254                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5255
5256         if (src_in_host) {
5257                 if (user_memset)
5258                         skip_host_mem_pin = true;
5259
5260                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5261                 dir = DMA_TO_DEVICE;
5262                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5263         } else {
5264                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5265                 dir = DMA_FROM_DEVICE;
5266                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5267                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5268                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5269         }
5270
5271         if (skip_host_mem_pin)
5272                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5273         else
5274                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5275                                                 addr, dir);
5276
5277         return rc;
5278 }
5279
5280 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5281                                 struct hl_cs_parser *parser,
5282                                 struct packet_lin_dma *user_dma_pkt)
5283 {
5284         bool src_in_host = false;
5285         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5286                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5287                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5288
5289         dev_dbg(hdev->dev, "DMA packet details:\n");
5290         dev_dbg(hdev->dev, "source == 0x%llx\n",
5291                                 le64_to_cpu(user_dma_pkt->src_addr));
5292         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5293         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5294
5295         /*
5296          * Special handling for DMA with size 0. Bypass all validations
5297          * because no transactions will be done except for WR_COMP, which
5298          * is not a security issue
5299          */
5300         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5301                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5302                 return 0;
5303         }
5304
5305         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5306                 src_in_host = true;
5307
5308         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5309                                                 src_in_host);
5310 }
5311
5312 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5313                                         struct hl_cs_parser *parser,
5314                                         struct packet_load_and_exe *user_pkt)
5315 {
5316         u32 cfg;
5317
5318         cfg = le32_to_cpu(user_pkt->cfg);
5319
5320         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5321                 dev_err(hdev->dev,
5322                         "User not allowed to use Load and Execute\n");
5323                 return -EPERM;
5324         }
5325
5326         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5327
5328         return 0;
5329 }
5330
5331 static int gaudi_validate_cb(struct hl_device *hdev,
5332                         struct hl_cs_parser *parser, bool is_mmu)
5333 {
5334         u32 cb_parsed_length = 0;
5335         int rc = 0;
5336
5337         parser->patched_cb_size = 0;
5338
5339         /* cb_user_size is more than 0 so loop will always be executed */
5340         while (cb_parsed_length < parser->user_cb_size) {
5341                 enum packet_id pkt_id;
5342                 u16 pkt_size;
5343                 struct gaudi_packet *user_pkt;
5344
5345                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5346
5347                 pkt_id = (enum packet_id) (
5348                                 (le64_to_cpu(user_pkt->header) &
5349                                 PACKET_HEADER_PACKET_ID_MASK) >>
5350                                         PACKET_HEADER_PACKET_ID_SHIFT);
5351
5352                 if (!validate_packet_id(pkt_id)) {
5353                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5354                         rc = -EINVAL;
5355                         break;
5356                 }
5357
5358                 pkt_size = gaudi_packet_sizes[pkt_id];
5359                 cb_parsed_length += pkt_size;
5360                 if (cb_parsed_length > parser->user_cb_size) {
5361                         dev_err(hdev->dev,
5362                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5363                         rc = -EINVAL;
5364                         break;
5365                 }
5366
5367                 switch (pkt_id) {
5368                 case PACKET_MSG_PROT:
5369                         dev_err(hdev->dev,
5370                                 "User not allowed to use MSG_PROT\n");
5371                         rc = -EPERM;
5372                         break;
5373
5374                 case PACKET_CP_DMA:
5375                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5376                         rc = -EPERM;
5377                         break;
5378
5379                 case PACKET_STOP:
5380                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5381                         rc = -EPERM;
5382                         break;
5383
5384                 case PACKET_WREG_BULK:
5385                         dev_err(hdev->dev,
5386                                 "User not allowed to use WREG_BULK\n");
5387                         rc = -EPERM;
5388                         break;
5389
5390                 case PACKET_LOAD_AND_EXE:
5391                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5392                                 (struct packet_load_and_exe *) user_pkt);
5393                         break;
5394
5395                 case PACKET_LIN_DMA:
5396                         parser->contains_dma_pkt = true;
5397                         if (is_mmu)
5398                                 parser->patched_cb_size += pkt_size;
5399                         else
5400                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5401                                         (struct packet_lin_dma *) user_pkt);
5402                         break;
5403
5404                 case PACKET_WREG_32:
5405                 case PACKET_MSG_LONG:
5406                 case PACKET_MSG_SHORT:
5407                 case PACKET_REPEAT:
5408                 case PACKET_FENCE:
5409                 case PACKET_NOP:
5410                 case PACKET_ARB_POINT:
5411                         parser->patched_cb_size += pkt_size;
5412                         break;
5413
5414                 default:
5415                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5416                                 pkt_id);
5417                         rc = -EINVAL;
5418                         break;
5419                 }
5420
5421                 if (rc)
5422                         break;
5423         }
5424
5425         /*
5426          * The new CB should have space at the end for two MSG_PROT packets:
5427          * 1. A packet that will act as a completion packet
5428          * 2. A packet that will generate MSI-X interrupt
5429          */
5430         if (parser->completion)
5431                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5432
5433         return rc;
5434 }
5435
5436 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5437                                 struct hl_cs_parser *parser,
5438                                 struct packet_lin_dma *user_dma_pkt,
5439                                 struct packet_lin_dma *new_dma_pkt,
5440                                 u32 *new_dma_pkt_size)
5441 {
5442         struct hl_userptr *userptr;
5443         struct scatterlist *sg, *sg_next_iter;
5444         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5445         u64 len, len_next;
5446         dma_addr_t dma_addr, dma_addr_next;
5447         u64 device_memory_addr, addr;
5448         enum dma_data_direction dir;
5449         struct sg_table *sgt;
5450         bool src_in_host = false;
5451         bool skip_host_mem_pin = false;
5452         bool user_memset;
5453
5454         ctl = le32_to_cpu(user_dma_pkt->ctl);
5455
5456         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5457                 src_in_host = true;
5458
5459         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5460                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5461
5462         if (src_in_host) {
5463                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5464                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5465                 dir = DMA_TO_DEVICE;
5466                 if (user_memset)
5467                         skip_host_mem_pin = true;
5468         } else {
5469                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5470                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5471                 dir = DMA_FROM_DEVICE;
5472         }
5473
5474         if ((!skip_host_mem_pin) &&
5475                 (!hl_userptr_is_pinned(hdev, addr,
5476                                         le32_to_cpu(user_dma_pkt->tsize),
5477                                         parser->job_userptr_list, &userptr))) {
5478                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5479                                 addr, user_dma_pkt->tsize);
5480                 return -EFAULT;
5481         }
5482
5483         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5484                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5485                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5486                 return 0;
5487         }
5488
5489         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5490
5491         sgt = userptr->sgt;
5492         dma_desc_cnt = 0;
5493
5494         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5495                 len = sg_dma_len(sg);
5496                 dma_addr = sg_dma_address(sg);
5497
5498                 if (len == 0)
5499                         break;
5500
5501                 while ((count + 1) < sgt->nents) {
5502                         sg_next_iter = sg_next(sg);
5503                         len_next = sg_dma_len(sg_next_iter);
5504                         dma_addr_next = sg_dma_address(sg_next_iter);
5505
5506                         if (len_next == 0)
5507                                 break;
5508
5509                         if ((dma_addr + len == dma_addr_next) &&
5510                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5511                                 len += len_next;
5512                                 count++;
5513                                 sg = sg_next_iter;
5514                         } else {
5515                                 break;
5516                         }
5517                 }
5518
5519                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5520                 if (likely(dma_desc_cnt))
5521                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5522                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5523                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5524                 new_dma_pkt->tsize = cpu_to_le32(len);
5525
5526                 if (dir == DMA_TO_DEVICE) {
5527                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5528                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5529                 } else {
5530                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5531                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5532                 }
5533
5534                 if (!user_memset)
5535                         device_memory_addr += len;
5536                 dma_desc_cnt++;
5537                 new_dma_pkt++;
5538         }
5539
5540         if (!dma_desc_cnt) {
5541                 dev_err(hdev->dev,
5542                         "Error of 0 SG entries when patching DMA packet\n");
5543                 return -EFAULT;
5544         }
5545
5546         /* Fix the last dma packet - wrcomp must be as user set it */
5547         new_dma_pkt--;
5548         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5549
5550         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5551
5552         return 0;
5553 }
5554
5555 static int gaudi_patch_cb(struct hl_device *hdev,
5556                                 struct hl_cs_parser *parser)
5557 {
5558         u32 cb_parsed_length = 0;
5559         u32 cb_patched_cur_length = 0;
5560         int rc = 0;
5561
5562         /* cb_user_size is more than 0 so loop will always be executed */
5563         while (cb_parsed_length < parser->user_cb_size) {
5564                 enum packet_id pkt_id;
5565                 u16 pkt_size;
5566                 u32 new_pkt_size = 0;
5567                 struct gaudi_packet *user_pkt, *kernel_pkt;
5568
5569                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5570                 kernel_pkt = parser->patched_cb->kernel_address +
5571                                         cb_patched_cur_length;
5572
5573                 pkt_id = (enum packet_id) (
5574                                 (le64_to_cpu(user_pkt->header) &
5575                                 PACKET_HEADER_PACKET_ID_MASK) >>
5576                                         PACKET_HEADER_PACKET_ID_SHIFT);
5577
5578                 if (!validate_packet_id(pkt_id)) {
5579                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5580                         rc = -EINVAL;
5581                         break;
5582                 }
5583
5584                 pkt_size = gaudi_packet_sizes[pkt_id];
5585                 cb_parsed_length += pkt_size;
5586                 if (cb_parsed_length > parser->user_cb_size) {
5587                         dev_err(hdev->dev,
5588                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5589                         rc = -EINVAL;
5590                         break;
5591                 }
5592
5593                 switch (pkt_id) {
5594                 case PACKET_LIN_DMA:
5595                         rc = gaudi_patch_dma_packet(hdev, parser,
5596                                         (struct packet_lin_dma *) user_pkt,
5597                                         (struct packet_lin_dma *) kernel_pkt,
5598                                         &new_pkt_size);
5599                         cb_patched_cur_length += new_pkt_size;
5600                         break;
5601
5602                 case PACKET_MSG_PROT:
5603                         dev_err(hdev->dev,
5604                                 "User not allowed to use MSG_PROT\n");
5605                         rc = -EPERM;
5606                         break;
5607
5608                 case PACKET_CP_DMA:
5609                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5610                         rc = -EPERM;
5611                         break;
5612
5613                 case PACKET_STOP:
5614                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5615                         rc = -EPERM;
5616                         break;
5617
5618                 case PACKET_WREG_32:
5619                 case PACKET_WREG_BULK:
5620                 case PACKET_MSG_LONG:
5621                 case PACKET_MSG_SHORT:
5622                 case PACKET_REPEAT:
5623                 case PACKET_FENCE:
5624                 case PACKET_NOP:
5625                 case PACKET_ARB_POINT:
5626                 case PACKET_LOAD_AND_EXE:
5627                         memcpy(kernel_pkt, user_pkt, pkt_size);
5628                         cb_patched_cur_length += pkt_size;
5629                         break;
5630
5631                 default:
5632                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5633                                 pkt_id);
5634                         rc = -EINVAL;
5635                         break;
5636                 }
5637
5638                 if (rc)
5639                         break;
5640         }
5641
5642         return rc;
5643 }
5644
5645 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5646                 struct hl_cs_parser *parser)
5647 {
5648         u64 patched_cb_handle;
5649         u32 patched_cb_size;
5650         struct hl_cb *user_cb;
5651         int rc;
5652
5653         /*
5654          * The new CB should have space at the end for two MSG_PROT pkt:
5655          * 1. A packet that will act as a completion packet
5656          * 2. A packet that will generate MSI interrupt
5657          */
5658         if (parser->completion)
5659                 parser->patched_cb_size = parser->user_cb_size +
5660                                 sizeof(struct packet_msg_prot) * 2;
5661         else
5662                 parser->patched_cb_size = parser->user_cb_size;
5663
5664         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5665                                 parser->patched_cb_size, false, false,
5666                                 &patched_cb_handle);
5667
5668         if (rc) {
5669                 dev_err(hdev->dev,
5670                         "Failed to allocate patched CB for DMA CS %d\n",
5671                         rc);
5672                 return rc;
5673         }
5674
5675         patched_cb_handle >>= PAGE_SHIFT;
5676         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5677                                 (u32) patched_cb_handle);
5678         /* hl_cb_get should never fail */
5679         if (!parser->patched_cb) {
5680                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5681                         (u32) patched_cb_handle);
5682                 rc = -EFAULT;
5683                 goto out;
5684         }
5685
5686         /*
5687          * The check that parser->user_cb_size <= parser->user_cb->size was done
5688          * in validate_queue_index().
5689          */
5690         memcpy(parser->patched_cb->kernel_address,
5691                 parser->user_cb->kernel_address,
5692                 parser->user_cb_size);
5693
5694         patched_cb_size = parser->patched_cb_size;
5695
5696         /* Validate patched CB instead of user CB */
5697         user_cb = parser->user_cb;
5698         parser->user_cb = parser->patched_cb;
5699         rc = gaudi_validate_cb(hdev, parser, true);
5700         parser->user_cb = user_cb;
5701
5702         if (rc) {
5703                 hl_cb_put(parser->patched_cb);
5704                 goto out;
5705         }
5706
5707         if (patched_cb_size != parser->patched_cb_size) {
5708                 dev_err(hdev->dev, "user CB size mismatch\n");
5709                 hl_cb_put(parser->patched_cb);
5710                 rc = -EINVAL;
5711                 goto out;
5712         }
5713
5714 out:
5715         /*
5716          * Always call cb destroy here because we still have 1 reference
5717          * to it by calling cb_get earlier. After the job will be completed,
5718          * cb_put will release it, but here we want to remove it from the
5719          * idr
5720          */
5721         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5722                                         patched_cb_handle << PAGE_SHIFT);
5723
5724         return rc;
5725 }
5726
5727 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5728                 struct hl_cs_parser *parser)
5729 {
5730         u64 patched_cb_handle;
5731         int rc;
5732
5733         rc = gaudi_validate_cb(hdev, parser, false);
5734
5735         if (rc)
5736                 goto free_userptr;
5737
5738         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5739                                 parser->patched_cb_size, false, false,
5740                                 &patched_cb_handle);
5741         if (rc) {
5742                 dev_err(hdev->dev,
5743                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5744                 goto free_userptr;
5745         }
5746
5747         patched_cb_handle >>= PAGE_SHIFT;
5748         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5749                                 (u32) patched_cb_handle);
5750         /* hl_cb_get should never fail here */
5751         if (!parser->patched_cb) {
5752                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5753                                 (u32) patched_cb_handle);
5754                 rc = -EFAULT;
5755                 goto out;
5756         }
5757
5758         rc = gaudi_patch_cb(hdev, parser);
5759
5760         if (rc)
5761                 hl_cb_put(parser->patched_cb);
5762
5763 out:
5764         /*
5765          * Always call cb destroy here because we still have 1 reference
5766          * to it by calling cb_get earlier. After the job will be completed,
5767          * cb_put will release it, but here we want to remove it from the
5768          * idr
5769          */
5770         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5771                                 patched_cb_handle << PAGE_SHIFT);
5772
5773 free_userptr:
5774         if (rc)
5775                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5776         return rc;
5777 }
5778
5779 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5780                                         struct hl_cs_parser *parser)
5781 {
5782         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5783         struct gaudi_device *gaudi = hdev->asic_specific;
5784         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5785                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5786
5787         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5788                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5789                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5790                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5791                                 parser->hw_queue_id);
5792                 return -EINVAL;
5793         }
5794
5795         /* For internal queue jobs just check if CB address is valid */
5796         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5797                                         parser->user_cb_size,
5798                                         asic_prop->sram_user_base_address,
5799                                         asic_prop->sram_end_address))
5800                 return 0;
5801
5802         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5803                                         parser->user_cb_size,
5804                                         asic_prop->dram_user_base_address,
5805                                         asic_prop->dram_end_address))
5806                 return 0;
5807
5808         /* PMMU and HPMMU addresses are equal, check only one of them */
5809         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5810                                         parser->user_cb_size,
5811                                         asic_prop->pmmu.start_addr,
5812                                         asic_prop->pmmu.end_addr))
5813                 return 0;
5814
5815         dev_err(hdev->dev,
5816                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5817                 parser->user_cb, parser->user_cb_size);
5818
5819         return -EFAULT;
5820 }
5821
5822 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5823 {
5824         struct gaudi_device *gaudi = hdev->asic_specific;
5825
5826         if (parser->queue_type == QUEUE_TYPE_INT)
5827                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5828
5829         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5830                 return gaudi_parse_cb_mmu(hdev, parser);
5831         else
5832                 return gaudi_parse_cb_no_mmu(hdev, parser);
5833 }
5834
5835 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5836                                         void *kernel_address, u32 len,
5837                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5838                                         bool eb)
5839 {
5840         struct gaudi_device *gaudi = hdev->asic_specific;
5841         struct packet_msg_prot *cq_pkt;
5842         u64 msi_addr;
5843         u32 tmp;
5844
5845         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5846
5847         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5848         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5849
5850         if (eb)
5851                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5852
5853         cq_pkt->ctl = cpu_to_le32(tmp);
5854         cq_pkt->value = cpu_to_le32(cq_val);
5855         cq_pkt->addr = cpu_to_le64(cq_addr);
5856
5857         cq_pkt++;
5858
5859         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5860         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5861         cq_pkt->ctl = cpu_to_le32(tmp);
5862         cq_pkt->value = cpu_to_le32(1);
5863
5864         if (gaudi->multi_msi_mode)
5865                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5866         else
5867                 msi_addr = mmPCIE_CORE_MSI_REQ;
5868
5869         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5870 }
5871
5872 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5873 {
5874         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5875 }
5876
5877 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5878                                         u32 size, u64 val)
5879 {
5880         struct packet_lin_dma *lin_dma_pkt;
5881         struct hl_cs_job *job;
5882         u32 cb_size, ctl, err_cause;
5883         struct hl_cb *cb;
5884         u64 id;
5885         int rc;
5886
5887         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5888         if (!cb)
5889                 return -EFAULT;
5890
5891         lin_dma_pkt = cb->kernel_address;
5892         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5893         cb_size = sizeof(*lin_dma_pkt);
5894
5895         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5896         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5897         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5898         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5899         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5900
5901         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5902         lin_dma_pkt->src_addr = cpu_to_le64(val);
5903         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5904         lin_dma_pkt->tsize = cpu_to_le32(size);
5905
5906         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5907         if (!job) {
5908                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5909                 rc = -ENOMEM;
5910                 goto release_cb;
5911         }
5912
5913         /* Verify DMA is OK */
5914         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5915         if (err_cause && !hdev->init_done) {
5916                 dev_dbg(hdev->dev,
5917                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5918                         err_cause);
5919                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5920         }
5921
5922         job->id = 0;
5923         job->user_cb = cb;
5924         atomic_inc(&job->user_cb->cs_cnt);
5925         job->user_cb_size = cb_size;
5926         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5927         job->patched_cb = job->user_cb;
5928         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5929
5930         hl_debugfs_add_job(hdev, job);
5931
5932         rc = gaudi_send_job_on_qman0(hdev, job);
5933         hl_debugfs_remove_job(hdev, job);
5934         kfree(job);
5935         atomic_dec(&cb->cs_cnt);
5936
5937         /* Verify DMA is OK */
5938         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5939         if (err_cause) {
5940                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5941                 rc = -EIO;
5942                 if (!hdev->init_done) {
5943                         dev_dbg(hdev->dev,
5944                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5945                                 err_cause);
5946                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5947                 }
5948         }
5949
5950 release_cb:
5951         id = cb->id;
5952         hl_cb_put(cb);
5953         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5954
5955         return rc;
5956 }
5957
5958 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5959                                         u32 num_regs, u32 val)
5960 {
5961         struct packet_msg_long *pkt;
5962         struct hl_cs_job *job;
5963         u32 cb_size, ctl;
5964         struct hl_cb *cb;
5965         int i, rc;
5966
5967         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5968
5969         if (cb_size > SZ_2M) {
5970                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5971                 return -ENOMEM;
5972         }
5973
5974         cb = hl_cb_kernel_create(hdev, cb_size, false);
5975         if (!cb)
5976                 return -EFAULT;
5977
5978         pkt = cb->kernel_address;
5979
5980         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5981         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5982         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5983         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5984         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5985
5986         for (i = 0; i < num_regs ; i++, pkt++) {
5987                 pkt->ctl = cpu_to_le32(ctl);
5988                 pkt->value = cpu_to_le32(val);
5989                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5990         }
5991
5992         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5993         if (!job) {
5994                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5995                 rc = -ENOMEM;
5996                 goto release_cb;
5997         }
5998
5999         job->id = 0;
6000         job->user_cb = cb;
6001         atomic_inc(&job->user_cb->cs_cnt);
6002         job->user_cb_size = cb_size;
6003         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
6004         job->patched_cb = job->user_cb;
6005         job->job_cb_size = cb_size;
6006
6007         hl_debugfs_add_job(hdev, job);
6008
6009         rc = gaudi_send_job_on_qman0(hdev, job);
6010         hl_debugfs_remove_job(hdev, job);
6011         kfree(job);
6012         atomic_dec(&cb->cs_cnt);
6013
6014 release_cb:
6015         hl_cb_put(cb);
6016         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
6017
6018         return rc;
6019 }
6020
6021 static int gaudi_restore_sm_registers(struct hl_device *hdev)
6022 {
6023         u64 base_addr;
6024         u32 num_regs;
6025         int rc;
6026
6027         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6028         num_regs = NUM_OF_SOB_IN_BLOCK;
6029         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6030         if (rc) {
6031                 dev_err(hdev->dev, "failed resetting SM registers");
6032                 return -ENOMEM;
6033         }
6034
6035         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
6036         num_regs = NUM_OF_SOB_IN_BLOCK;
6037         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6038         if (rc) {
6039                 dev_err(hdev->dev, "failed resetting SM registers");
6040                 return -ENOMEM;
6041         }
6042
6043         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6044         num_regs = NUM_OF_SOB_IN_BLOCK;
6045         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6046         if (rc) {
6047                 dev_err(hdev->dev, "failed resetting SM registers");
6048                 return -ENOMEM;
6049         }
6050
6051         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6052         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6053         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6054         if (rc) {
6055                 dev_err(hdev->dev, "failed resetting SM registers");
6056                 return -ENOMEM;
6057         }
6058
6059         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6060         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6061         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6062         if (rc) {
6063                 dev_err(hdev->dev, "failed resetting SM registers");
6064                 return -ENOMEM;
6065         }
6066
6067         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6068         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6069         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6070         if (rc) {
6071                 dev_err(hdev->dev, "failed resetting SM registers");
6072                 return -ENOMEM;
6073         }
6074
6075         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6076                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6077         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6078         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6079         if (rc) {
6080                 dev_err(hdev->dev, "failed resetting SM registers");
6081                 return -ENOMEM;
6082         }
6083
6084         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6085                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6086         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6087         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6088         if (rc) {
6089                 dev_err(hdev->dev, "failed resetting SM registers");
6090                 return -ENOMEM;
6091         }
6092
6093         return 0;
6094 }
6095
6096 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6097 {
6098         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6099                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6100         int i;
6101
6102         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6103                 u64 sob_addr = CFG_BASE +
6104                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6105                                 (i * sob_delta);
6106                 u32 dma_offset = i * DMA_CORE_OFFSET;
6107
6108                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6109                                 lower_32_bits(sob_addr));
6110                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6111                                 upper_32_bits(sob_addr));
6112                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6113
6114                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6115                  * modified by the user for SRAM reduction
6116                  */
6117                 if (i > 1)
6118                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6119                                                                 0x00000001);
6120         }
6121 }
6122
6123 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6124 {
6125         u32 qman_offset;
6126         int i;
6127
6128         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6129                 qman_offset = i * DMA_QMAN_OFFSET;
6130                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6131         }
6132
6133         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6134                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6135                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6136         }
6137
6138         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6139                 qman_offset = i * TPC_QMAN_OFFSET;
6140                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6141         }
6142
6143         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6144                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6145                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6146                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6147         }
6148 }
6149
6150 static int gaudi_restore_user_registers(struct hl_device *hdev)
6151 {
6152         int rc;
6153
6154         rc = gaudi_restore_sm_registers(hdev);
6155         if (rc)
6156                 return rc;
6157
6158         gaudi_restore_dma_registers(hdev);
6159         gaudi_restore_qm_registers(hdev);
6160
6161         return 0;
6162 }
6163
6164 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6165 {
6166         return 0;
6167 }
6168
6169 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6170 {
6171         struct asic_fixed_properties *prop = &hdev->asic_prop;
6172         struct gaudi_device *gaudi = hdev->asic_specific;
6173         u64 addr = prop->mmu_pgt_addr;
6174         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6175
6176         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6177                 return 0;
6178
6179         return gaudi_memset_device_memory(hdev, addr, size, 0);
6180 }
6181
6182 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6183 {
6184
6185 }
6186
6187 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6188                         bool user_address, u32 *val)
6189 {
6190         struct asic_fixed_properties *prop = &hdev->asic_prop;
6191         struct gaudi_device *gaudi = hdev->asic_specific;
6192         u64 hbm_bar_addr, host_phys_end;
6193         int rc = 0;
6194
6195         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6196
6197         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6198
6199                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6200                                 (hdev->clock_gating_mask &
6201                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6202
6203                         dev_err_ratelimited(hdev->dev,
6204                                 "Can't read register - clock gating is enabled!\n");
6205                         rc = -EFAULT;
6206                 } else {
6207                         *val = RREG32(addr - CFG_BASE);
6208                 }
6209
6210         } else if ((addr >= SRAM_BASE_ADDR) &&
6211                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6212                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6213                                 (addr - SRAM_BASE_ADDR));
6214         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6215                 u64 bar_base_addr = DRAM_PHYS_BASE +
6216                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6217
6218                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6219                 if (hbm_bar_addr != U64_MAX) {
6220                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6221                                                 (addr - bar_base_addr));
6222
6223                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6224                                                 hbm_bar_addr);
6225                 }
6226                 if (hbm_bar_addr == U64_MAX)
6227                         rc = -EIO;
6228         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6229                         user_address && !iommu_present(&pci_bus_type)) {
6230                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6231         } else {
6232                 rc = -EFAULT;
6233         }
6234
6235         return rc;
6236 }
6237
6238 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6239                         bool user_address, u32 val)
6240 {
6241         struct asic_fixed_properties *prop = &hdev->asic_prop;
6242         struct gaudi_device *gaudi = hdev->asic_specific;
6243         u64 hbm_bar_addr, host_phys_end;
6244         int rc = 0;
6245
6246         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6247
6248         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6249
6250                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6251                                 (hdev->clock_gating_mask &
6252                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6253
6254                         dev_err_ratelimited(hdev->dev,
6255                                 "Can't write register - clock gating is enabled!\n");
6256                         rc = -EFAULT;
6257                 } else {
6258                         WREG32(addr - CFG_BASE, val);
6259                 }
6260
6261         } else if ((addr >= SRAM_BASE_ADDR) &&
6262                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6263                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6264                                         (addr - SRAM_BASE_ADDR));
6265         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6266                 u64 bar_base_addr = DRAM_PHYS_BASE +
6267                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6268
6269                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6270                 if (hbm_bar_addr != U64_MAX) {
6271                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6272                                                 (addr - bar_base_addr));
6273
6274                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6275                                                 hbm_bar_addr);
6276                 }
6277                 if (hbm_bar_addr == U64_MAX)
6278                         rc = -EIO;
6279         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6280                         user_address && !iommu_present(&pci_bus_type)) {
6281                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6282         } else {
6283                 rc = -EFAULT;
6284         }
6285
6286         return rc;
6287 }
6288
6289 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6290                                 bool user_address, u64 *val)
6291 {
6292         struct asic_fixed_properties *prop = &hdev->asic_prop;
6293         struct gaudi_device *gaudi = hdev->asic_specific;
6294         u64 hbm_bar_addr, host_phys_end;
6295         int rc = 0;
6296
6297         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6298
6299         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6300
6301                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6302                                 (hdev->clock_gating_mask &
6303                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6304
6305                         dev_err_ratelimited(hdev->dev,
6306                                 "Can't read register - clock gating is enabled!\n");
6307                         rc = -EFAULT;
6308                 } else {
6309                         u32 val_l = RREG32(addr - CFG_BASE);
6310                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6311
6312                         *val = (((u64) val_h) << 32) | val_l;
6313                 }
6314
6315         } else if ((addr >= SRAM_BASE_ADDR) &&
6316                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6317                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6318                                 (addr - SRAM_BASE_ADDR));
6319         } else if (addr <=
6320                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6321                 u64 bar_base_addr = DRAM_PHYS_BASE +
6322                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6323
6324                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6325                 if (hbm_bar_addr != U64_MAX) {
6326                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6327                                                 (addr - bar_base_addr));
6328
6329                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6330                                                 hbm_bar_addr);
6331                 }
6332                 if (hbm_bar_addr == U64_MAX)
6333                         rc = -EIO;
6334         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6335                         user_address && !iommu_present(&pci_bus_type)) {
6336                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6337         } else {
6338                 rc = -EFAULT;
6339         }
6340
6341         return rc;
6342 }
6343
6344 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6345                                 bool user_address, u64 val)
6346 {
6347         struct asic_fixed_properties *prop = &hdev->asic_prop;
6348         struct gaudi_device *gaudi = hdev->asic_specific;
6349         u64 hbm_bar_addr, host_phys_end;
6350         int rc = 0;
6351
6352         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6353
6354         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6355
6356                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6357                                 (hdev->clock_gating_mask &
6358                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6359
6360                         dev_err_ratelimited(hdev->dev,
6361                                 "Can't write register - clock gating is enabled!\n");
6362                         rc = -EFAULT;
6363                 } else {
6364                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6365                         WREG32(addr + sizeof(u32) - CFG_BASE,
6366                                 upper_32_bits(val));
6367                 }
6368
6369         } else if ((addr >= SRAM_BASE_ADDR) &&
6370                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6371                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6372                                         (addr - SRAM_BASE_ADDR));
6373         } else if (addr <=
6374                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6375                 u64 bar_base_addr = DRAM_PHYS_BASE +
6376                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6377
6378                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6379                 if (hbm_bar_addr != U64_MAX) {
6380                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6381                                                 (addr - bar_base_addr));
6382
6383                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6384                                                 hbm_bar_addr);
6385                 }
6386                 if (hbm_bar_addr == U64_MAX)
6387                         rc = -EIO;
6388         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6389                         user_address && !iommu_present(&pci_bus_type)) {
6390                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6391         } else {
6392                 rc = -EFAULT;
6393         }
6394
6395         return rc;
6396 }
6397
6398 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6399                                         u32 size_to_dma, dma_addr_t dma_addr)
6400 {
6401         u32 err_cause, val;
6402         u64 dma_offset;
6403         int rc;
6404
6405         dma_offset = dma_id * DMA_CORE_OFFSET;
6406
6407         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6408         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6409         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6410         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6411         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6412         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6413                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6414
6415         rc = hl_poll_timeout(
6416                 hdev,
6417                 mmDMA0_CORE_STS0 + dma_offset,
6418                 val,
6419                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6420                 0,
6421                 1000000);
6422
6423         if (rc) {
6424                 dev_err(hdev->dev,
6425                         "DMA %d timed-out during reading of 0x%llx\n",
6426                         dma_id, addr);
6427                 return -EIO;
6428         }
6429
6430         /* Verify DMA is OK */
6431         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6432         if (err_cause) {
6433                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6434                 dev_dbg(hdev->dev,
6435                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6436                         err_cause);
6437                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6438
6439                 return -EIO;
6440         }
6441
6442         return 0;
6443 }
6444
6445 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6446                                 void *blob_addr)
6447 {
6448         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6449         struct gaudi_device *gaudi = hdev->asic_specific;
6450         u32 qm_glbl_sts0, qm_cgm_sts;
6451         u64 dma_offset, qm_offset;
6452         dma_addr_t dma_addr;
6453         void *kernel_addr;
6454         bool is_eng_idle;
6455         int rc = 0, dma_id;
6456
6457         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6458                                                 hdev, SZ_2M,
6459                                                 &dma_addr,
6460                                                 GFP_KERNEL | __GFP_ZERO);
6461
6462         if (!kernel_addr)
6463                 return -ENOMEM;
6464
6465         mutex_lock(&gaudi->clk_gate_mutex);
6466
6467         hdev->asic_funcs->disable_clock_gating(hdev);
6468
6469         hdev->asic_funcs->hw_queues_lock(hdev);
6470
6471         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6472         dma_offset = dma_id * DMA_CORE_OFFSET;
6473         qm_offset = dma_id * DMA_QMAN_OFFSET;
6474         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6475         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6476         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6477         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6478                       IS_DMA_IDLE(dma_core_sts0);
6479
6480         if (!is_eng_idle) {
6481                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6482                 dma_offset = dma_id * DMA_CORE_OFFSET;
6483                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6484                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6485                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6486                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6487                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6488                               IS_DMA_IDLE(dma_core_sts0);
6489
6490                 if (!is_eng_idle) {
6491                         dev_err_ratelimited(hdev->dev,
6492                                 "Can't read via DMA because it is BUSY\n");
6493                         rc = -EAGAIN;
6494                         goto out;
6495                 }
6496         }
6497
6498         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6499         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6500                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6501
6502         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6503          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6504          * ASID
6505          */
6506         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6507
6508         /* Verify DMA is OK */
6509         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6510         if (err_cause) {
6511                 dev_dbg(hdev->dev,
6512                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6513                         err_cause);
6514                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6515         }
6516
6517         pos = 0;
6518         size_left = size;
6519         size_to_dma = SZ_2M;
6520
6521         while (size_left > 0) {
6522
6523                 if (size_left < SZ_2M)
6524                         size_to_dma = size_left;
6525
6526                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6527                                                 dma_addr);
6528                 if (rc)
6529                         break;
6530
6531                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6532
6533                 if (size_left <= SZ_2M)
6534                         break;
6535
6536                 pos += SZ_2M;
6537                 addr += SZ_2M;
6538                 size_left -= SZ_2M;
6539         }
6540
6541         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6542          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6543          * ASID
6544          */
6545         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6546                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6547
6548         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6549
6550 out:
6551         hdev->asic_funcs->hw_queues_unlock(hdev);
6552
6553         hdev->asic_funcs->set_clock_gating(hdev);
6554
6555         mutex_unlock(&gaudi->clk_gate_mutex);
6556
6557         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6558                                                 dma_addr);
6559
6560         return rc;
6561 }
6562
6563 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6564 {
6565         struct gaudi_device *gaudi = hdev->asic_specific;
6566
6567         if (hdev->reset_info.hard_reset_pending)
6568                 return U64_MAX;
6569
6570         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6571                         (addr - gaudi->hbm_bar_cur_addr));
6572 }
6573
6574 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6575 {
6576         struct gaudi_device *gaudi = hdev->asic_specific;
6577
6578         if (hdev->reset_info.hard_reset_pending)
6579                 return;
6580
6581         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6582                         (addr - gaudi->hbm_bar_cur_addr));
6583 }
6584
6585 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6586 {
6587         /* mask to zero the MMBP and ASID bits */
6588         WREG32_AND(reg, ~0x7FF);
6589         WREG32_OR(reg, asid);
6590 }
6591
6592 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6593 {
6594         struct gaudi_device *gaudi = hdev->asic_specific;
6595
6596         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6597                 return;
6598
6599         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6600                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6601                 return;
6602         }
6603
6604         mutex_lock(&gaudi->clk_gate_mutex);
6605
6606         hdev->asic_funcs->disable_clock_gating(hdev);
6607
6608         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6613
6614         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6619
6620         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6625
6626         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6628         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6631
6632         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6637
6638         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6643
6644         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6649
6650         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6652         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6655
6656         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6657         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6658         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6660         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6664
6665         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6669         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6670         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6671         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6672
6673         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6674         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6676         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6677         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6678         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6679         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6680
6681         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6682         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6683         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6684         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6685         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6686         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6687         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6688
6689         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6690         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6691         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6692         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6693         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6694         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6695         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6696
6697         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6698         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6699         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6700         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6701         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6702         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6703         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6704
6705         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6706         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6707         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6708         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6709         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6710         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6711         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6712
6713         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6714         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6715         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6716         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6717         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6718         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6719         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6720
6721         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6722         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6723         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6724         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6725         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6726         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6727         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6728
6729         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6730         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6731         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6732         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6733         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6734         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6735         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6736         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6737         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6738         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6739
6740         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6741         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6742         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6743         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6744         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6745         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6746         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6747         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6748         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6749         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6750         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6751         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6752
6753         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6754                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6755                                 asid);
6756                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6757                                 asid);
6758                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6759                                 asid);
6760                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6761                                 asid);
6762                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6763                                 asid);
6764         }
6765
6766         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6767                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6768                                 asid);
6769                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6770                                 asid);
6771                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6772                                 asid);
6773                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6774                                 asid);
6775                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6776                                 asid);
6777         }
6778
6779         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6780                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6781                                 asid);
6782                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6783                                 asid);
6784                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6785                                 asid);
6786                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6787                                 asid);
6788                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6789                                 asid);
6790         }
6791
6792         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6793                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6794                                 asid);
6795                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6796                                 asid);
6797                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6798                                 asid);
6799                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6800                                 asid);
6801                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6802                                 asid);
6803         }
6804
6805         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6806                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6807                                 asid);
6808                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6809                                 asid);
6810                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6811                                 asid);
6812                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6813                                 asid);
6814                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6815                                 asid);
6816         }
6817
6818         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6819                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6820                                 asid);
6821                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6822                                 asid);
6823                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6824                                 asid);
6825                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6826                                 asid);
6827                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6828                                 asid);
6829         }
6830
6831         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6832                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6833                                 asid);
6834                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6835                                 asid);
6836                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6837                                 asid);
6838                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6839                                 asid);
6840                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6841                                 asid);
6842         }
6843
6844         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6845                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6846                                 asid);
6847                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6848                                 asid);
6849                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6850                                 asid);
6851                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6852                                 asid);
6853                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6854                                 asid);
6855         }
6856
6857         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6858                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6859                                 asid);
6860                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6861                                 asid);
6862                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6863                                 asid);
6864                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6865                                 asid);
6866                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6867                                 asid);
6868         }
6869
6870         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6871                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6872                                 asid);
6873                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6874                                 asid);
6875                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6876                                 asid);
6877                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6878                                 asid);
6879                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6880                                 asid);
6881         }
6882
6883         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6884         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6885
6886         hdev->asic_funcs->set_clock_gating(hdev);
6887
6888         mutex_unlock(&gaudi->clk_gate_mutex);
6889 }
6890
6891 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6892                 struct hl_cs_job *job)
6893 {
6894         struct packet_msg_prot *fence_pkt;
6895         u32 *fence_ptr;
6896         dma_addr_t fence_dma_addr;
6897         struct hl_cb *cb;
6898         u32 tmp, timeout, dma_offset;
6899         int rc;
6900
6901         if (hdev->pldm)
6902                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6903         else
6904                 timeout = HL_DEVICE_TIMEOUT_USEC;
6905
6906         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6907                 dev_err_ratelimited(hdev->dev,
6908                         "Can't send driver job on QMAN0 because the device is not idle\n");
6909                 return -EBUSY;
6910         }
6911
6912         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6913                                                         &fence_dma_addr);
6914         if (!fence_ptr) {
6915                 dev_err(hdev->dev,
6916                         "Failed to allocate fence memory for QMAN0\n");
6917                 return -ENOMEM;
6918         }
6919
6920         cb = job->patched_cb;
6921
6922         fence_pkt = cb->kernel_address +
6923                         job->job_cb_size - sizeof(struct packet_msg_prot);
6924
6925         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6926         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6927         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6928
6929         fence_pkt->ctl = cpu_to_le32(tmp);
6930         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6931         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6932
6933         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6934
6935         WREG32(mmDMA0_CORE_PROT + dma_offset,
6936                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6937
6938         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6939                                         job->job_cb_size, cb->bus_address);
6940         if (rc) {
6941                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6942                 goto free_fence_ptr;
6943         }
6944
6945         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6946                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6947                                 timeout, true);
6948
6949         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6950
6951         if (rc == -ETIMEDOUT) {
6952                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6953                 goto free_fence_ptr;
6954         }
6955
6956 free_fence_ptr:
6957         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6958
6959         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6960                                         fence_dma_addr);
6961         return rc;
6962 }
6963
6964 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6965 {
6966         if (event_type >= GAUDI_EVENT_SIZE)
6967                 goto event_not_supported;
6968
6969         if (!gaudi_irq_map_table[event_type].valid)
6970                 goto event_not_supported;
6971
6972         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6973
6974         return;
6975
6976 event_not_supported:
6977         snprintf(desc, size, "N/A");
6978 }
6979
6980 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6981                                                         bool is_write, s32 *engine_id_1,
6982                                                         s32 *engine_id_2)
6983 {
6984         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6985
6986         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6987                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6988
6989         switch (x_y) {
6990         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6991         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6992                 dma_id[0] = 0;
6993                 dma_id[1] = 2;
6994                 break;
6995         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6996         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6997                 dma_id[0] = 1;
6998                 dma_id[1] = 3;
6999                 break;
7000         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7001         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7002                 dma_id[0] = 4;
7003                 dma_id[1] = 6;
7004                 break;
7005         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7006         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7007                 dma_id[0] = 5;
7008                 dma_id[1] = 7;
7009                 break;
7010         default:
7011                 goto unknown_initiator;
7012         }
7013
7014         for (i = 0 ; i < 2 ; i++) {
7015                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
7016                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
7017         }
7018
7019         switch (x_y) {
7020         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7021         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7022                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
7023                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
7024                         return "DMA0";
7025                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
7026                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
7027                         return "DMA2";
7028                 } else {
7029                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
7030                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
7031                         return "DMA0 or DMA2";
7032                 }
7033         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7034         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7035                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
7036                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
7037                         return "DMA1";
7038                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
7039                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
7040                         return "DMA3";
7041                 } else {
7042                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
7043                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
7044                         return "DMA1 or DMA3";
7045                 }
7046         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7047         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7048                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
7049                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
7050                         return "DMA4";
7051                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
7052                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
7053                         return "DMA6";
7054                 } else {
7055                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
7056                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
7057                         return "DMA4 or DMA6";
7058                 }
7059         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7060         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7061                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
7062                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
7063                         return "DMA5";
7064                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
7065                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
7066                         return "DMA7";
7067                 } else {
7068                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
7069                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
7070                         return "DMA5 or DMA7";
7071                 }
7072         }
7073
7074 unknown_initiator:
7075         return "unknown initiator";
7076 }
7077
7078 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
7079                                                         u32 *engine_id_1, u32 *engine_id_2)
7080 {
7081         u32 val, x_y, axi_id;
7082
7083         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7084                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
7085         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7086                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7087         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7088                         RAZWI_INITIATOR_AXI_ID_SHIFT);
7089
7090         switch (x_y) {
7091         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7092                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7093                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
7094                         return "TPC0";
7095                 }
7096                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7097                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
7098                         return "NIC0";
7099                 }
7100                 break;
7101         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7102                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
7103                 return "TPC1";
7104         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7105         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7106                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
7107                 return "MME0";
7108         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7109         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7110                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
7111                 return "MME1";
7112         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7113                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
7114                 return "TPC2";
7115         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7116                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7117                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
7118                         return "TPC3";
7119                 }
7120                 /* PCI, CPU or PSOC does not have engine id*/
7121                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7122                         return "PCI";
7123                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7124                         return "CPU";
7125                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7126                         return "PSOC";
7127                 break;
7128         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7129         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7130         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7131         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7132         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7133         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7134         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7135         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7136                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
7137                                 engine_id_1, engine_id_2);
7138         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7139                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7140                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
7141                         return "TPC4";
7142                 }
7143                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7144                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
7145                         return "NIC1";
7146                 }
7147                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7148                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
7149                         return "NIC2";
7150                 }
7151                 break;
7152         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7153                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
7154                 return "TPC5";
7155         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7156         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7157                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
7158                 return "MME2";
7159         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7160         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7161                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
7162                 return "MME3";
7163         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7164                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
7165                 return "TPC6";
7166         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7167                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7168                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
7169                         return "TPC7";
7170                 }
7171                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7172                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
7173                         return "NIC4";
7174                 }
7175                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7176                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
7177                         return "NIC5";
7178                 }
7179                 break;
7180         default:
7181                 break;
7182         }
7183
7184         dev_err(hdev->dev,
7185                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7186                 val,
7187                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7188                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7189                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7190                         RAZWI_INITIATOR_AXI_ID_MASK);
7191
7192         return "unknown initiator";
7193 }
7194
7195 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
7196                                                 u32 *engine_id_2)
7197 {
7198
7199         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7200                 dev_err_ratelimited(hdev->dev,
7201                         "RAZWI event caused by illegal write of %s\n",
7202                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
7203                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7204         }
7205
7206         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7207                 dev_err_ratelimited(hdev->dev,
7208                         "RAZWI event caused by illegal read of %s\n",
7209                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
7210                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7211         }
7212 }
7213
7214 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
7215 {
7216         struct gaudi_device *gaudi = hdev->asic_specific;
7217         u32 val;
7218
7219         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7220                 return;
7221
7222         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7223         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7224                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7225                 *addr <<= 32;
7226                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7227
7228                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
7229                 *type = HL_RAZWI_PAGE_FAULT;
7230
7231                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7232         }
7233
7234         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7235         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7236                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7237                 *addr <<= 32;
7238                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7239
7240                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
7241                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
7242
7243                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7244         }
7245 }
7246
7247 /*
7248  *  +-------------------+------------------------------------------------------+
7249  *  | Configuration Reg |                     Description                      |
7250  *  |      Address      |                                                      |
7251  *  +-------------------+------------------------------------------------------+
7252  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7253  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7254  *  |                   |0xF34 memory wrappers 63:32                           |
7255  *  |                   |0xF38 memory wrappers 95:64                           |
7256  *  |                   |0xF3C memory wrappers 127:96                          |
7257  *  +-------------------+------------------------------------------------------+
7258  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7259  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7260  *  |                   |0xF44 memory wrappers 63:32                           |
7261  *  |                   |0xF48 memory wrappers 95:64                           |
7262  *  |                   |0xF4C memory wrappers 127:96                          |
7263  *  +-------------------+------------------------------------------------------+
7264  */
7265 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7266                 struct ecc_info_extract_params *params, u64 *ecc_address,
7267                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7268 {
7269         struct gaudi_device *gaudi = hdev->asic_specific;
7270         u32 i, num_mem_regs, reg, err_bit;
7271         u64 err_addr, err_word = 0;
7272         int rc = 0;
7273
7274         num_mem_regs = params->num_memories / 32 +
7275                         ((params->num_memories % 32) ? 1 : 0);
7276
7277         if (params->block_address >= CFG_BASE)
7278                 params->block_address -= CFG_BASE;
7279
7280         if (params->derr)
7281                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7282         else
7283                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7284
7285         if (params->disable_clock_gating) {
7286                 mutex_lock(&gaudi->clk_gate_mutex);
7287                 hdev->asic_funcs->disable_clock_gating(hdev);
7288         }
7289
7290         /* Set invalid wrapper index */
7291         *memory_wrapper_idx = 0xFF;
7292
7293         /* Iterate through memory wrappers, a single bit must be set */
7294         for (i = 0 ; i < num_mem_regs ; i++) {
7295                 err_addr += i * 4;
7296                 err_word = RREG32(err_addr);
7297                 if (err_word) {
7298                         err_bit = __ffs(err_word);
7299                         *memory_wrapper_idx = err_bit + (32 * i);
7300                         break;
7301                 }
7302         }
7303
7304         if (*memory_wrapper_idx == 0xFF) {
7305                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7306                 rc = -EINVAL;
7307                 goto enable_clk_gate;
7308         }
7309
7310         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7311                         *memory_wrapper_idx);
7312
7313         *ecc_address =
7314                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7315         *ecc_syndrom =
7316                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7317
7318         /* Clear error indication */
7319         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7320         if (params->derr)
7321                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7322         else
7323                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7324
7325         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7326
7327 enable_clk_gate:
7328         if (params->disable_clock_gating) {
7329                 hdev->asic_funcs->set_clock_gating(hdev);
7330
7331                 mutex_unlock(&gaudi->clk_gate_mutex);
7332         }
7333
7334         return rc;
7335 }
7336
7337 /*
7338  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7339  *
7340  * @idx: the current pi/ci value
7341  * @q_len: the queue length (power of 2)
7342  *
7343  * @return the cyclically decremented index
7344  */
7345 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7346 {
7347         u32 mask = q_len - 1;
7348
7349         /*
7350          * modular decrement is equivalent to adding (queue_size -1)
7351          * later we take LSBs to make sure the value is in the
7352          * range [0, queue_len - 1]
7353          */
7354         return (idx + q_len - 1) & mask;
7355 }
7356
7357 /**
7358  * gaudi_print_sw_config_stream_data - print SW config stream data
7359  *
7360  * @hdev: pointer to the habanalabs device structure
7361  * @stream: the QMAN's stream
7362  * @qman_base: base address of QMAN registers block
7363  */
7364 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7365                                                 u64 qman_base)
7366 {
7367         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7368         u32 cq_ptr_lo_off, size;
7369
7370         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7371
7372         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7373                                                 stream * cq_ptr_lo_off;
7374         cq_ptr_hi = cq_ptr_lo +
7375                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7376         cq_tsize = cq_ptr_lo +
7377                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7378
7379         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7380         size = RREG32(cq_tsize);
7381         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7382                                                         stream, cq_ptr, size);
7383 }
7384
7385 /**
7386  * gaudi_print_last_pqes_on_err - print last PQEs on error
7387  *
7388  * @hdev: pointer to the habanalabs device structure
7389  * @qid_base: first QID of the QMAN (out of 4 streams)
7390  * @stream: the QMAN's stream
7391  * @qman_base: base address of QMAN registers block
7392  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7393  */
7394 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7395                                                 u32 stream, u64 qman_base,
7396                                                 bool pr_sw_conf)
7397 {
7398         u32 ci, qm_ci_stream_off, queue_len;
7399         struct hl_hw_queue *q;
7400         u64 pq_ci;
7401         int i;
7402
7403         q = &hdev->kernel_queues[qid_base + stream];
7404
7405         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7406         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7407                                                 stream * qm_ci_stream_off;
7408
7409         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7410                                         q->int_queue_len : HL_QUEUE_LENGTH;
7411
7412         hdev->asic_funcs->hw_queues_lock(hdev);
7413
7414         if (pr_sw_conf)
7415                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7416
7417         ci = RREG32(pq_ci);
7418
7419         /* we should start printing form ci -1 */
7420         ci = gaudi_queue_idx_dec(ci, queue_len);
7421
7422         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7423                 struct hl_bd *bd;
7424                 u64 addr;
7425                 u32 len;
7426
7427                 bd = q->kernel_address;
7428                 bd += ci;
7429
7430                 len = le32_to_cpu(bd->len);
7431                 /* len 0 means uninitialized entry- break */
7432                 if (!len)
7433                         break;
7434
7435                 addr = le64_to_cpu(bd->ptr);
7436
7437                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7438                                                         stream, ci, addr, len);
7439
7440                 /* get previous ci, wrap if needed */
7441                 ci = gaudi_queue_idx_dec(ci, queue_len);
7442         }
7443
7444         hdev->asic_funcs->hw_queues_unlock(hdev);
7445 }
7446
7447 /**
7448  * print_qman_data_on_err - extract QMAN data on error
7449  *
7450  * @hdev: pointer to the habanalabs device structure
7451  * @qid_base: first QID of the QMAN (out of 4 streams)
7452  * @stream: the QMAN's stream
7453  * @qman_base: base address of QMAN registers block
7454  *
7455  * This function attempt to exatract as much data as possible on QMAN error.
7456  * On upper CP print the SW config stream data and last 8 PQEs.
7457  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7458  */
7459 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7460                                                 u32 stream, u64 qman_base)
7461 {
7462         u32 i;
7463
7464         if (stream != QMAN_STREAMS) {
7465                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7466                                                                         true);
7467                 return;
7468         }
7469
7470         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7471
7472         for (i = 0; i < QMAN_STREAMS; i++)
7473                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7474                                                                         false);
7475 }
7476
7477 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7478                                           const char *qm_name,
7479                                           u64 qman_base,
7480                                           u32 qid_base)
7481 {
7482         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7483         u64 glbl_sts_addr, arb_err_addr;
7484         char reg_desc[32];
7485
7486         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7487         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7488
7489         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7490         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7491                 glbl_sts_clr_val = 0;
7492                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7493
7494                 if (!glbl_sts_val)
7495                         continue;
7496
7497                 if (i == QMAN_STREAMS)
7498                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7499                 else
7500                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7501
7502                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7503                         if (glbl_sts_val & BIT(j)) {
7504                                 dev_err_ratelimited(hdev->dev,
7505                                                 "%s %s. err cause: %s\n",
7506                                                 qm_name, reg_desc,
7507                                                 gaudi_qman_error_cause[j]);
7508                                 glbl_sts_clr_val |= BIT(j);
7509                         }
7510                 }
7511
7512                 /* Write 1 clear errors */
7513                 if (!hdev->stop_on_err)
7514                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7515                 else
7516                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7517         }
7518
7519         arb_err_val = RREG32(arb_err_addr);
7520
7521         if (!arb_err_val)
7522                 return;
7523
7524         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7525                 if (arb_err_val & BIT(j)) {
7526                         dev_err_ratelimited(hdev->dev,
7527                                         "%s ARB_ERR. err cause: %s\n",
7528                                         qm_name,
7529                                         gaudi_qman_arb_error_cause[j]);
7530                 }
7531         }
7532 }
7533
7534 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7535                 struct hl_eq_sm_sei_data *sei_data)
7536 {
7537         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7538
7539         /* Flip the bits as the enum is ordered in the opposite way */
7540         index = (index ^ 0x3) & 0x3;
7541
7542         switch (sei_data->sei_cause) {
7543         case SM_SEI_SO_OVERFLOW:
7544                 dev_err_ratelimited(hdev->dev,
7545                         "%s SEI Error: SOB Group %u overflow/underflow",
7546                         gaudi_sync_manager_names[index],
7547                         le32_to_cpu(sei_data->sei_log));
7548                 break;
7549         case SM_SEI_LBW_4B_UNALIGNED:
7550                 dev_err_ratelimited(hdev->dev,
7551                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7552                         gaudi_sync_manager_names[index],
7553                         le32_to_cpu(sei_data->sei_log));
7554                 break;
7555         case SM_SEI_AXI_RESPONSE_ERR:
7556                 dev_err_ratelimited(hdev->dev,
7557                         "%s SEI Error: AXI ID %u response error",
7558                         gaudi_sync_manager_names[index],
7559                         le32_to_cpu(sei_data->sei_log));
7560                 break;
7561         default:
7562                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7563                                 le32_to_cpu(sei_data->sei_log));
7564                 break;
7565         }
7566 }
7567
7568 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7569                 struct hl_eq_ecc_data *ecc_data)
7570 {
7571         struct ecc_info_extract_params params;
7572         u64 ecc_address = 0, ecc_syndrom = 0;
7573         u8 index, memory_wrapper_idx = 0;
7574         bool extract_info_from_fw;
7575         int rc;
7576
7577         if (hdev->asic_prop.fw_security_enabled) {
7578                 extract_info_from_fw = true;
7579                 goto extract_ecc_info;
7580         }
7581
7582         switch (event_type) {
7583         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7584         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7585                 extract_info_from_fw = true;
7586                 break;
7587         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7588                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7589                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7590                 params.num_memories = 90;
7591                 params.derr = false;
7592                 params.disable_clock_gating = true;
7593                 extract_info_from_fw = false;
7594                 break;
7595         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7596                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7597                 params.block_address =
7598                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7599                 params.num_memories = 90;
7600                 params.derr = true;
7601                 params.disable_clock_gating = true;
7602                 extract_info_from_fw = false;
7603                 break;
7604         case GAUDI_EVENT_MME0_ACC_SERR:
7605         case GAUDI_EVENT_MME1_ACC_SERR:
7606         case GAUDI_EVENT_MME2_ACC_SERR:
7607         case GAUDI_EVENT_MME3_ACC_SERR:
7608                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7609                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7610                 params.num_memories = 128;
7611                 params.derr = false;
7612                 params.disable_clock_gating = true;
7613                 extract_info_from_fw = false;
7614                 break;
7615         case GAUDI_EVENT_MME0_ACC_DERR:
7616         case GAUDI_EVENT_MME1_ACC_DERR:
7617         case GAUDI_EVENT_MME2_ACC_DERR:
7618         case GAUDI_EVENT_MME3_ACC_DERR:
7619                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7620                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7621                 params.num_memories = 128;
7622                 params.derr = true;
7623                 params.disable_clock_gating = true;
7624                 extract_info_from_fw = false;
7625                 break;
7626         case GAUDI_EVENT_MME0_SBAB_SERR:
7627         case GAUDI_EVENT_MME1_SBAB_SERR:
7628         case GAUDI_EVENT_MME2_SBAB_SERR:
7629         case GAUDI_EVENT_MME3_SBAB_SERR:
7630                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7631                 params.block_address =
7632                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7633                 params.num_memories = 33;
7634                 params.derr = false;
7635                 params.disable_clock_gating = true;
7636                 extract_info_from_fw = false;
7637                 break;
7638         case GAUDI_EVENT_MME0_SBAB_DERR:
7639         case GAUDI_EVENT_MME1_SBAB_DERR:
7640         case GAUDI_EVENT_MME2_SBAB_DERR:
7641         case GAUDI_EVENT_MME3_SBAB_DERR:
7642                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7643                 params.block_address =
7644                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7645                 params.num_memories = 33;
7646                 params.derr = true;
7647                 params.disable_clock_gating = true;
7648                 extract_info_from_fw = false;
7649                 break;
7650         default:
7651                 return;
7652         }
7653
7654 extract_ecc_info:
7655         if (extract_info_from_fw) {
7656                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7657                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7658                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7659         } else {
7660                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7661                                 &ecc_syndrom, &memory_wrapper_idx);
7662                 if (rc)
7663                         return;
7664         }
7665
7666         dev_err(hdev->dev,
7667                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7668                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7669 }
7670
7671 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7672 {
7673         u64 qman_base;
7674         char desc[32];
7675         u32 qid_base;
7676         u8 index;
7677
7678         switch (event_type) {
7679         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7680                 index = event_type - GAUDI_EVENT_TPC0_QM;
7681                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7682                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7683                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7684                 break;
7685         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7686                 index = event_type - GAUDI_EVENT_MME0_QM;
7687                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7688                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7689                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7690                 break;
7691         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7692                 index = event_type - GAUDI_EVENT_DMA0_QM;
7693                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7694                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7695                 if (index > 1)
7696                         qid_base++;
7697                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7698                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7699                 break;
7700         case GAUDI_EVENT_NIC0_QM0:
7701                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7702                 qman_base = mmNIC0_QM0_BASE;
7703                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7704                 break;
7705         case GAUDI_EVENT_NIC0_QM1:
7706                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7707                 qman_base = mmNIC0_QM1_BASE;
7708                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7709                 break;
7710         case GAUDI_EVENT_NIC1_QM0:
7711                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7712                 qman_base = mmNIC1_QM0_BASE;
7713                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7714                 break;
7715         case GAUDI_EVENT_NIC1_QM1:
7716                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7717                 qman_base = mmNIC1_QM1_BASE;
7718                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7719                 break;
7720         case GAUDI_EVENT_NIC2_QM0:
7721                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7722                 qman_base = mmNIC2_QM0_BASE;
7723                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7724                 break;
7725         case GAUDI_EVENT_NIC2_QM1:
7726                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7727                 qman_base = mmNIC2_QM1_BASE;
7728                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7729                 break;
7730         case GAUDI_EVENT_NIC3_QM0:
7731                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7732                 qman_base = mmNIC3_QM0_BASE;
7733                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7734                 break;
7735         case GAUDI_EVENT_NIC3_QM1:
7736                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7737                 qman_base = mmNIC3_QM1_BASE;
7738                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7739                 break;
7740         case GAUDI_EVENT_NIC4_QM0:
7741                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7742                 qman_base = mmNIC4_QM0_BASE;
7743                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7744                 break;
7745         case GAUDI_EVENT_NIC4_QM1:
7746                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7747                 qman_base = mmNIC4_QM1_BASE;
7748                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7749                 break;
7750         default:
7751                 return;
7752         }
7753
7754         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7755 }
7756
7757 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7758                                         bool razwi)
7759 {
7760         u32 engine_id_1, engine_id_2;
7761         char desc[64] = "";
7762         u64 razwi_addr = 0;
7763         u8 razwi_type;
7764         int rc;
7765
7766         /*
7767          * Init engine id by default as not valid and only if razwi initiated from engine with
7768          * engine id it will get valid value.
7769          * Init razwi type to default, will be changed only if razwi caused by page fault of
7770          * MMU access error
7771          */
7772         engine_id_1 = U16_MAX;
7773         engine_id_2 = U16_MAX;
7774         razwi_type = U8_MAX;
7775
7776         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7777         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7778                 event_type, desc);
7779
7780         if (razwi) {
7781                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7782                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7783
7784                 /* In case it's the first razwi, save its parameters*/
7785                 rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
7786                 if (!rc) {
7787                         hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
7788                         hdev->last_error.razwi_timestamp = ktime_get();
7789                         hdev->last_error.razwi_addr = razwi_addr;
7790                         hdev->last_error.razwi_engine_id_1 = engine_id_1;
7791                         hdev->last_error.razwi_engine_id_2 = engine_id_2;
7792                         /*
7793                          * If first engine id holds non valid value the razwi initiator
7794                          * does not have engine id
7795                          */
7796                         hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
7797                         hdev->last_error.razwi_type = razwi_type;
7798
7799                 }
7800         }
7801 }
7802
7803 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7804                                         struct cpucp_pkt_sync_err *sync_err)
7805 {
7806         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7807
7808         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7809                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7810 }
7811
7812 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7813                                         struct hl_eq_fw_alive *fw_alive)
7814 {
7815         dev_err(hdev->dev,
7816                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7817                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7818                 "Minor" : "Critical", fw_alive->process_id,
7819                 fw_alive->thread_id, fw_alive->uptime_seconds);
7820 }
7821
7822 static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
7823 {
7824         /* GAUDI doesn't support any reset except hard-reset */
7825         return -EPERM;
7826 }
7827
7828 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7829                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7830 {
7831         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7832         int rc = 0;
7833
7834         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7835                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7836                 if (!hbm_ecc_data) {
7837                         dev_err(hdev->dev, "No FW ECC data");
7838                         return 0;
7839                 }
7840
7841                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7842                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7843                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7844                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7845                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7846                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7847                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7848                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7849                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7850                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7851                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7852                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7853                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7854                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7855
7856                 dev_err(hdev->dev,
7857                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7858                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7859                 dev_err(hdev->dev,
7860                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7861                         device, ch, hbm_ecc_data->first_addr, type,
7862                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7863                         hbm_ecc_data->dec_cnt);
7864                 return 0;
7865         }
7866
7867         if (hdev->asic_prop.fw_security_enabled) {
7868                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7869                 return 0;
7870         }
7871
7872         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7873         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7874                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7875                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7876                 if (val) {
7877                         rc = -EIO;
7878                         dev_err(hdev->dev,
7879                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7880                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7881                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7882                                 (val >> 4) & 0x1);
7883
7884                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7885                         dev_err(hdev->dev,
7886                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7887                                 device, ch * 2,
7888                                 RREG32(base + ch * 0x1000 + 0x064),
7889                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7890                                 (val2 & 0xFF0000) >> 16,
7891                                 (val2 & 0xFF000000) >> 24);
7892                 }
7893
7894                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7895                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7896                 if (val) {
7897                         rc = -EIO;
7898                         dev_err(hdev->dev,
7899                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7900                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7901                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7902                                 (val >> 4) & 0x1);
7903
7904                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7905                         dev_err(hdev->dev,
7906                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7907                                 device, ch * 2 + 1,
7908                                 RREG32(base + ch * 0x1000 + 0x074),
7909                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7910                                 (val2 & 0xFF0000) >> 16,
7911                                 (val2 & 0xFF000000) >> 24);
7912                 }
7913
7914                 /* Clear interrupts */
7915                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7916                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7917                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7918                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7919                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7920                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7921         }
7922
7923         val  = RREG32(base + 0x8F30);
7924         val2 = RREG32(base + 0x8F34);
7925         if (val | val2) {
7926                 rc = -EIO;
7927                 dev_err(hdev->dev,
7928                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7929                         device, val, val2);
7930         }
7931         val  = RREG32(base + 0x8F40);
7932         val2 = RREG32(base + 0x8F44);
7933         if (val | val2) {
7934                 rc = -EIO;
7935                 dev_err(hdev->dev,
7936                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7937                         device, val, val2);
7938         }
7939
7940         return rc;
7941 }
7942
7943 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7944 {
7945         switch (hbm_event_type) {
7946         case GAUDI_EVENT_HBM0_SPI_0:
7947         case GAUDI_EVENT_HBM0_SPI_1:
7948                 return 0;
7949         case GAUDI_EVENT_HBM1_SPI_0:
7950         case GAUDI_EVENT_HBM1_SPI_1:
7951                 return 1;
7952         case GAUDI_EVENT_HBM2_SPI_0:
7953         case GAUDI_EVENT_HBM2_SPI_1:
7954                 return 2;
7955         case GAUDI_EVENT_HBM3_SPI_0:
7956         case GAUDI_EVENT_HBM3_SPI_1:
7957                 return 3;
7958         default:
7959                 break;
7960         }
7961
7962         /* Should never happen */
7963         return 0;
7964 }
7965
7966 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7967                                         char *interrupt_name)
7968 {
7969         struct gaudi_device *gaudi = hdev->asic_specific;
7970         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7971         bool soft_reset_required = false;
7972
7973         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7974          * gating, and thus cannot be done in CPU-CP and should be done instead
7975          * by the driver.
7976          */
7977
7978         mutex_lock(&gaudi->clk_gate_mutex);
7979
7980         hdev->asic_funcs->disable_clock_gating(hdev);
7981
7982         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7983                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7984
7985         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7986                 if (tpc_interrupts_cause & BIT(i)) {
7987                         dev_err_ratelimited(hdev->dev,
7988                                         "TPC%d_%s interrupt cause: %s\n",
7989                                         tpc_id, interrupt_name,
7990                                         gaudi_tpc_interrupts_cause[i]);
7991                         /* If this is QM error, we need to soft-reset */
7992                         if (i == 15)
7993                                 soft_reset_required = true;
7994                 }
7995
7996         /* Clear interrupts */
7997         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7998
7999         hdev->asic_funcs->set_clock_gating(hdev);
8000
8001         mutex_unlock(&gaudi->clk_gate_mutex);
8002
8003         return soft_reset_required;
8004 }
8005
8006 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
8007 {
8008         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
8009 }
8010
8011 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
8012 {
8013         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
8014 }
8015
8016 static void gaudi_print_clk_change_info(struct hl_device *hdev,
8017                                         u16 event_type)
8018 {
8019         ktime_t zero_time = ktime_set(0, 0);
8020
8021         mutex_lock(&hdev->clk_throttling.lock);
8022
8023         switch (event_type) {
8024         case GAUDI_EVENT_FIX_POWER_ENV_S:
8025                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
8026                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
8027                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
8028                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
8029                 dev_info_ratelimited(hdev->dev,
8030                         "Clock throttling due to power consumption\n");
8031                 break;
8032
8033         case GAUDI_EVENT_FIX_POWER_ENV_E:
8034                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
8035                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
8036                 dev_info_ratelimited(hdev->dev,
8037                         "Power envelop is safe, back to optimal clock\n");
8038                 break;
8039
8040         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
8041                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
8042                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
8043                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
8044                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
8045                 dev_info_ratelimited(hdev->dev,
8046                         "Clock throttling due to overheating\n");
8047                 break;
8048
8049         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
8050                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
8051                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
8052                 dev_info_ratelimited(hdev->dev,
8053                         "Thermal envelop is safe, back to optimal clock\n");
8054                 break;
8055
8056         default:
8057                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
8058                         event_type);
8059                 break;
8060         }
8061
8062         mutex_unlock(&hdev->clk_throttling.lock);
8063 }
8064
8065 static void gaudi_handle_eqe(struct hl_device *hdev,
8066                                 struct hl_eq_entry *eq_entry)
8067 {
8068         struct gaudi_device *gaudi = hdev->asic_specific;
8069         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
8070         u32 fw_fatal_err_flag = 0;
8071         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
8072                         >> EQ_CTL_EVENT_TYPE_SHIFT);
8073         bool reset_required;
8074         u8 cause;
8075         int rc;
8076
8077         if (event_type >= GAUDI_EVENT_SIZE) {
8078                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
8079                                 event_type, GAUDI_EVENT_SIZE - 1);
8080                 return;
8081         }
8082
8083         gaudi->events_stat[event_type]++;
8084         gaudi->events_stat_aggregate[event_type]++;
8085
8086         switch (event_type) {
8087         case GAUDI_EVENT_PCIE_CORE_DERR:
8088         case GAUDI_EVENT_PCIE_IF_DERR:
8089         case GAUDI_EVENT_PCIE_PHY_DERR:
8090         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
8091         case GAUDI_EVENT_MME0_ACC_DERR:
8092         case GAUDI_EVENT_MME0_SBAB_DERR:
8093         case GAUDI_EVENT_MME1_ACC_DERR:
8094         case GAUDI_EVENT_MME1_SBAB_DERR:
8095         case GAUDI_EVENT_MME2_ACC_DERR:
8096         case GAUDI_EVENT_MME2_SBAB_DERR:
8097         case GAUDI_EVENT_MME3_ACC_DERR:
8098         case GAUDI_EVENT_MME3_SBAB_DERR:
8099         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
8100                 fallthrough;
8101         case GAUDI_EVENT_CPU_IF_ECC_DERR:
8102         case GAUDI_EVENT_PSOC_MEM_DERR:
8103         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
8104         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
8105         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
8106         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
8107         case GAUDI_EVENT_MMU_DERR:
8108         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
8109                 gaudi_print_irq_info(hdev, event_type, true);
8110                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8111                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
8112                 goto reset_device;
8113
8114         case GAUDI_EVENT_GIC500:
8115         case GAUDI_EVENT_AXI_ECC:
8116         case GAUDI_EVENT_L2_RAM_ECC:
8117         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
8118                 gaudi_print_irq_info(hdev, event_type, false);
8119                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
8120                 goto reset_device;
8121
8122         case GAUDI_EVENT_HBM0_SPI_0:
8123         case GAUDI_EVENT_HBM1_SPI_0:
8124         case GAUDI_EVENT_HBM2_SPI_0:
8125         case GAUDI_EVENT_HBM3_SPI_0:
8126                 gaudi_print_irq_info(hdev, event_type, false);
8127                 gaudi_hbm_read_interrupts(hdev,
8128                                 gaudi_hbm_event_to_dev(event_type),
8129                                 &eq_entry->hbm_ecc_data);
8130                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
8131                 goto reset_device;
8132
8133         case GAUDI_EVENT_HBM0_SPI_1:
8134         case GAUDI_EVENT_HBM1_SPI_1:
8135         case GAUDI_EVENT_HBM2_SPI_1:
8136         case GAUDI_EVENT_HBM3_SPI_1:
8137                 gaudi_print_irq_info(hdev, event_type, false);
8138                 gaudi_hbm_read_interrupts(hdev,
8139                                 gaudi_hbm_event_to_dev(event_type),
8140                                 &eq_entry->hbm_ecc_data);
8141                 hl_fw_unmask_irq(hdev, event_type);
8142                 break;
8143
8144         case GAUDI_EVENT_TPC0_DEC:
8145         case GAUDI_EVENT_TPC1_DEC:
8146         case GAUDI_EVENT_TPC2_DEC:
8147         case GAUDI_EVENT_TPC3_DEC:
8148         case GAUDI_EVENT_TPC4_DEC:
8149         case GAUDI_EVENT_TPC5_DEC:
8150         case GAUDI_EVENT_TPC6_DEC:
8151         case GAUDI_EVENT_TPC7_DEC:
8152                 gaudi_print_irq_info(hdev, event_type, true);
8153                 reset_required = gaudi_tpc_read_interrupts(hdev,
8154                                         tpc_dec_event_to_tpc_id(event_type),
8155                                         "AXI_SLV_DEC_Error");
8156                 if (reset_required) {
8157                         dev_err(hdev->dev, "reset required due to %s\n",
8158                                 gaudi_irq_map_table[event_type].name);
8159
8160                         hl_device_reset(hdev, 0);
8161                 } else {
8162                         hl_fw_unmask_irq(hdev, event_type);
8163                 }
8164                 break;
8165
8166         case GAUDI_EVENT_TPC0_KRN_ERR:
8167         case GAUDI_EVENT_TPC1_KRN_ERR:
8168         case GAUDI_EVENT_TPC2_KRN_ERR:
8169         case GAUDI_EVENT_TPC3_KRN_ERR:
8170         case GAUDI_EVENT_TPC4_KRN_ERR:
8171         case GAUDI_EVENT_TPC5_KRN_ERR:
8172         case GAUDI_EVENT_TPC6_KRN_ERR:
8173         case GAUDI_EVENT_TPC7_KRN_ERR:
8174                 gaudi_print_irq_info(hdev, event_type, true);
8175                 reset_required = gaudi_tpc_read_interrupts(hdev,
8176                                         tpc_krn_event_to_tpc_id(event_type),
8177                                         "KRN_ERR");
8178                 if (reset_required) {
8179                         dev_err(hdev->dev, "reset required due to %s\n",
8180                                 gaudi_irq_map_table[event_type].name);
8181
8182                         hl_device_reset(hdev, 0);
8183                 } else {
8184                         hl_fw_unmask_irq(hdev, event_type);
8185                 }
8186                 break;
8187
8188         case GAUDI_EVENT_PCIE_CORE_SERR:
8189         case GAUDI_EVENT_PCIE_IF_SERR:
8190         case GAUDI_EVENT_PCIE_PHY_SERR:
8191         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8192         case GAUDI_EVENT_MME0_ACC_SERR:
8193         case GAUDI_EVENT_MME0_SBAB_SERR:
8194         case GAUDI_EVENT_MME1_ACC_SERR:
8195         case GAUDI_EVENT_MME1_SBAB_SERR:
8196         case GAUDI_EVENT_MME2_ACC_SERR:
8197         case GAUDI_EVENT_MME2_SBAB_SERR:
8198         case GAUDI_EVENT_MME3_ACC_SERR:
8199         case GAUDI_EVENT_MME3_SBAB_SERR:
8200         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8201         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8202         case GAUDI_EVENT_PSOC_MEM_SERR:
8203         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8204         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8205         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8206         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8207                 fallthrough;
8208         case GAUDI_EVENT_MMU_SERR:
8209                 gaudi_print_irq_info(hdev, event_type, true);
8210                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8211                 hl_fw_unmask_irq(hdev, event_type);
8212                 break;
8213
8214         case GAUDI_EVENT_PCIE_DEC:
8215         case GAUDI_EVENT_MME0_WBC_RSP:
8216         case GAUDI_EVENT_MME0_SBAB0_RSP:
8217         case GAUDI_EVENT_MME1_WBC_RSP:
8218         case GAUDI_EVENT_MME1_SBAB0_RSP:
8219         case GAUDI_EVENT_MME2_WBC_RSP:
8220         case GAUDI_EVENT_MME2_SBAB0_RSP:
8221         case GAUDI_EVENT_MME3_WBC_RSP:
8222         case GAUDI_EVENT_MME3_SBAB0_RSP:
8223         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8224         case GAUDI_EVENT_PSOC_AXI_DEC:
8225         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8226         case GAUDI_EVENT_MMU_PAGE_FAULT:
8227         case GAUDI_EVENT_MMU_WR_PERM:
8228         case GAUDI_EVENT_RAZWI_OR_ADC:
8229         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8230         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8231         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8232                 fallthrough;
8233         case GAUDI_EVENT_NIC0_QM0:
8234         case GAUDI_EVENT_NIC0_QM1:
8235         case GAUDI_EVENT_NIC1_QM0:
8236         case GAUDI_EVENT_NIC1_QM1:
8237         case GAUDI_EVENT_NIC2_QM0:
8238         case GAUDI_EVENT_NIC2_QM1:
8239         case GAUDI_EVENT_NIC3_QM0:
8240         case GAUDI_EVENT_NIC3_QM1:
8241         case GAUDI_EVENT_NIC4_QM0:
8242         case GAUDI_EVENT_NIC4_QM1:
8243         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8244                 gaudi_print_irq_info(hdev, event_type, true);
8245                 gaudi_handle_qman_err(hdev, event_type);
8246                 hl_fw_unmask_irq(hdev, event_type);
8247                 break;
8248
8249         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8250                 gaudi_print_irq_info(hdev, event_type, true);
8251                 goto reset_device;
8252
8253         case GAUDI_EVENT_TPC0_BMON_SPMU:
8254         case GAUDI_EVENT_TPC1_BMON_SPMU:
8255         case GAUDI_EVENT_TPC2_BMON_SPMU:
8256         case GAUDI_EVENT_TPC3_BMON_SPMU:
8257         case GAUDI_EVENT_TPC4_BMON_SPMU:
8258         case GAUDI_EVENT_TPC5_BMON_SPMU:
8259         case GAUDI_EVENT_TPC6_BMON_SPMU:
8260         case GAUDI_EVENT_TPC7_BMON_SPMU:
8261         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8262                 gaudi_print_irq_info(hdev, event_type, false);
8263                 hl_fw_unmask_irq(hdev, event_type);
8264                 break;
8265
8266         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8267                 gaudi_print_irq_info(hdev, event_type, false);
8268                 gaudi_print_sm_sei_info(hdev, event_type,
8269                                         &eq_entry->sm_sei_data);
8270                 rc = hl_state_dump(hdev);
8271                 if (rc)
8272                         dev_err(hdev->dev,
8273                                 "Error during system state dump %d\n", rc);
8274                 hl_fw_unmask_irq(hdev, event_type);
8275                 break;
8276
8277         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8278                 gaudi_print_clk_change_info(hdev, event_type);
8279                 hl_fw_unmask_irq(hdev, event_type);
8280                 break;
8281
8282         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8283                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8284                 dev_err(hdev->dev,
8285                         "Received high temp H/W interrupt %d (cause %d)\n",
8286                         event_type, cause);
8287                 break;
8288
8289         case GAUDI_EVENT_DEV_RESET_REQ:
8290                 gaudi_print_irq_info(hdev, event_type, false);
8291                 goto reset_device;
8292
8293         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8294                 gaudi_print_irq_info(hdev, event_type, false);
8295                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8296                 goto reset_device;
8297
8298         case GAUDI_EVENT_FW_ALIVE_S:
8299                 gaudi_print_irq_info(hdev, event_type, false);
8300                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8301                 goto reset_device;
8302
8303         default:
8304                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8305                                 event_type);
8306                 break;
8307         }
8308
8309         return;
8310
8311 reset_device:
8312         if (hdev->asic_prop.fw_security_enabled)
8313                 hl_device_reset(hdev, HL_DRV_RESET_HARD
8314                                         | HL_DRV_RESET_BYPASS_REQ_TO_FW
8315                                         | fw_fatal_err_flag);
8316         else if (hdev->hard_reset_on_fw_events)
8317                 hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
8318         else
8319                 hl_fw_unmask_irq(hdev, event_type);
8320 }
8321
8322 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8323                                         u32 *size)
8324 {
8325         struct gaudi_device *gaudi = hdev->asic_specific;
8326
8327         if (aggregate) {
8328                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8329                 return gaudi->events_stat_aggregate;
8330         }
8331
8332         *size = (u32) sizeof(gaudi->events_stat);
8333         return gaudi->events_stat;
8334 }
8335
8336 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8337                                         u32 flags)
8338 {
8339         struct gaudi_device *gaudi = hdev->asic_specific;
8340         u32 status, timeout_usec;
8341         int rc;
8342
8343         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8344                 hdev->reset_info.hard_reset_pending)
8345                 return 0;
8346
8347         if (hdev->pldm)
8348                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8349         else
8350                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8351
8352         /* L0 & L1 invalidation */
8353         WREG32(mmSTLB_INV_PS, 3);
8354         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8355         WREG32(mmSTLB_INV_PS, 2);
8356
8357         rc = hl_poll_timeout(
8358                 hdev,
8359                 mmSTLB_INV_PS,
8360                 status,
8361                 !status,
8362                 1000,
8363                 timeout_usec);
8364
8365         WREG32(mmSTLB_INV_SET, 0);
8366
8367         return rc;
8368 }
8369
8370 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8371                                                 bool is_hard, u32 flags,
8372                                                 u32 asid, u64 va, u64 size)
8373 {
8374         /* Treat as invalidate all because there is no range invalidation
8375          * in Gaudi
8376          */
8377         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8378 }
8379
8380 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8381                                         u32 asid, u64 phys_addr)
8382 {
8383         u32 status, timeout_usec;
8384         int rc;
8385
8386         if (hdev->pldm)
8387                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8388         else
8389                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8390
8391         WREG32(MMU_ASID, asid);
8392         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8393         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8394         WREG32(MMU_BUSY, 0x80000000);
8395
8396         rc = hl_poll_timeout(
8397                 hdev,
8398                 MMU_BUSY,
8399                 status,
8400                 !(status & 0x80000000),
8401                 1000,
8402                 timeout_usec);
8403
8404         if (rc) {
8405                 dev_err(hdev->dev,
8406                         "Timeout during MMU hop0 config of asid %d\n", asid);
8407                 return rc;
8408         }
8409
8410         return 0;
8411 }
8412
8413 static int gaudi_send_heartbeat(struct hl_device *hdev)
8414 {
8415         struct gaudi_device *gaudi = hdev->asic_specific;
8416
8417         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8418                 return 0;
8419
8420         return hl_fw_send_heartbeat(hdev);
8421 }
8422
8423 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8424 {
8425         struct gaudi_device *gaudi = hdev->asic_specific;
8426         struct asic_fixed_properties *prop = &hdev->asic_prop;
8427         int rc;
8428
8429         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8430                 return 0;
8431
8432         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8433                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8434                                         mmCPU_BOOT_ERR1);
8435         if (rc)
8436                 return rc;
8437
8438         if (!strlen(prop->cpucp_info.card_name))
8439                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8440                                 CARD_NAME_MAX_LEN);
8441
8442         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8443
8444         set_default_power_values(hdev);
8445
8446         hdev->max_power = prop->max_power_default;
8447
8448         return 0;
8449 }
8450
8451 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8452                                         u8 mask_len, struct seq_file *s)
8453 {
8454         struct gaudi_device *gaudi = hdev->asic_specific;
8455         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8456         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8457         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8458         unsigned long *mask = (unsigned long *)mask_arr;
8459         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8460         bool is_idle = true, is_eng_idle, is_slave;
8461         u64 offset;
8462         int i, dma_id, port;
8463
8464         mutex_lock(&gaudi->clk_gate_mutex);
8465
8466         hdev->asic_funcs->disable_clock_gating(hdev);
8467
8468         if (s)
8469                 seq_puts(s,
8470                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8471                         "---  -------  ------------  ----------  -------------\n");
8472
8473         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8474                 dma_id = gaudi_dma_assignment[i];
8475                 offset = dma_id * DMA_QMAN_OFFSET;
8476
8477                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8478                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8479                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8480                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8481                                 IS_DMA_IDLE(dma_core_sts0);
8482                 is_idle &= is_eng_idle;
8483
8484                 if (mask && !is_eng_idle)
8485                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8486                 if (s)
8487                         seq_printf(s, fmt, dma_id,
8488                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8489                                 qm_cgm_sts, dma_core_sts0);
8490         }
8491
8492         if (s)
8493                 seq_puts(s,
8494                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8495                         "---  -------  ------------  ----------  ----------\n");
8496
8497         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8498                 offset = i * TPC_QMAN_OFFSET;
8499                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8500                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8501                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8502                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8503                                 IS_TPC_IDLE(tpc_cfg_sts);
8504                 is_idle &= is_eng_idle;
8505
8506                 if (mask && !is_eng_idle)
8507                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8508                 if (s)
8509                         seq_printf(s, fmt, i,
8510                                 is_eng_idle ? "Y" : "N",
8511                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8512         }
8513
8514         if (s)
8515                 seq_puts(s,
8516                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8517                         "---  -------  ------------  ----------  -----------\n");
8518
8519         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8520                 offset = i * MME_QMAN_OFFSET;
8521                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8522                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8523
8524                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8525                 is_slave = i % 2;
8526                 if (!is_slave) {
8527                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8528                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8529                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8530                 }
8531
8532                 is_idle &= is_eng_idle;
8533
8534                 if (mask && !is_eng_idle)
8535                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8536                 if (s) {
8537                         if (!is_slave)
8538                                 seq_printf(s, fmt, i,
8539                                         is_eng_idle ? "Y" : "N",
8540                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8541                         else
8542                                 seq_printf(s, mme_slave_fmt, i,
8543                                         is_eng_idle ? "Y" : "N", "-",
8544                                         "-", mme_arch_sts);
8545                 }
8546         }
8547
8548         if (s)
8549                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8550                                 "---  -------  ------------  ----------\n");
8551
8552         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8553                 offset = i * NIC_MACRO_QMAN_OFFSET;
8554                 port = 2 * i;
8555                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8556                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8557                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8558                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8559                         is_idle &= is_eng_idle;
8560
8561                         if (mask && !is_eng_idle)
8562                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8563                         if (s)
8564                                 seq_printf(s, nic_fmt, port,
8565                                                 is_eng_idle ? "Y" : "N",
8566                                                 qm_glbl_sts0, qm_cgm_sts);
8567                 }
8568
8569                 port = 2 * i + 1;
8570                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8571                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8572                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8573                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8574                         is_idle &= is_eng_idle;
8575
8576                         if (mask && !is_eng_idle)
8577                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8578                         if (s)
8579                                 seq_printf(s, nic_fmt, port,
8580                                                 is_eng_idle ? "Y" : "N",
8581                                                 qm_glbl_sts0, qm_cgm_sts);
8582                 }
8583         }
8584
8585         if (s)
8586                 seq_puts(s, "\n");
8587
8588         hdev->asic_funcs->set_clock_gating(hdev);
8589
8590         mutex_unlock(&gaudi->clk_gate_mutex);
8591
8592         return is_idle;
8593 }
8594
8595 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8596         __acquires(&gaudi->hw_queues_lock)
8597 {
8598         struct gaudi_device *gaudi = hdev->asic_specific;
8599
8600         spin_lock(&gaudi->hw_queues_lock);
8601 }
8602
8603 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8604         __releases(&gaudi->hw_queues_lock)
8605 {
8606         struct gaudi_device *gaudi = hdev->asic_specific;
8607
8608         spin_unlock(&gaudi->hw_queues_lock);
8609 }
8610
8611 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8612 {
8613         return hdev->pdev->device;
8614 }
8615
8616 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8617                                 size_t max_size)
8618 {
8619         struct gaudi_device *gaudi = hdev->asic_specific;
8620
8621         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8622                 return 0;
8623
8624         return hl_fw_get_eeprom_data(hdev, data, max_size);
8625 }
8626
8627 /*
8628  * this function should be used only during initialization and/or after reset,
8629  * when there are no active users.
8630  */
8631 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8632                                 u32 tpc_id)
8633 {
8634         struct gaudi_device *gaudi = hdev->asic_specific;
8635         u64 kernel_timeout;
8636         u32 status, offset;
8637         int rc;
8638
8639         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8640
8641         if (hdev->pldm)
8642                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8643         else
8644                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8645
8646         mutex_lock(&gaudi->clk_gate_mutex);
8647
8648         hdev->asic_funcs->disable_clock_gating(hdev);
8649
8650         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8651                         lower_32_bits(tpc_kernel));
8652         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8653                         upper_32_bits(tpc_kernel));
8654
8655         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8656                         lower_32_bits(tpc_kernel));
8657         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8658                         upper_32_bits(tpc_kernel));
8659         /* set a valid LUT pointer, content is of no significance */
8660         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8661                         lower_32_bits(tpc_kernel));
8662         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8663                         upper_32_bits(tpc_kernel));
8664
8665         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8666                         lower_32_bits(CFG_BASE +
8667                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8668
8669         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8670                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8671                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8672         /* wait a bit for the engine to start executing */
8673         usleep_range(1000, 1500);
8674
8675         /* wait until engine has finished executing */
8676         rc = hl_poll_timeout(
8677                 hdev,
8678                 mmTPC0_CFG_STATUS + offset,
8679                 status,
8680                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8681                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8682                 1000,
8683                 kernel_timeout);
8684
8685         if (rc) {
8686                 dev_err(hdev->dev,
8687                         "Timeout while waiting for TPC%d icache prefetch\n",
8688                         tpc_id);
8689                 hdev->asic_funcs->set_clock_gating(hdev);
8690                 mutex_unlock(&gaudi->clk_gate_mutex);
8691                 return -EIO;
8692         }
8693
8694         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8695                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8696
8697         /* wait a bit for the engine to start executing */
8698         usleep_range(1000, 1500);
8699
8700         /* wait until engine has finished executing */
8701         rc = hl_poll_timeout(
8702                 hdev,
8703                 mmTPC0_CFG_STATUS + offset,
8704                 status,
8705                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8706                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8707                 1000,
8708                 kernel_timeout);
8709
8710         if (rc) {
8711                 dev_err(hdev->dev,
8712                         "Timeout while waiting for TPC%d vector pipe\n",
8713                         tpc_id);
8714                 hdev->asic_funcs->set_clock_gating(hdev);
8715                 mutex_unlock(&gaudi->clk_gate_mutex);
8716                 return -EIO;
8717         }
8718
8719         rc = hl_poll_timeout(
8720                 hdev,
8721                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8722                 status,
8723                 (status == 0),
8724                 1000,
8725                 kernel_timeout);
8726
8727         hdev->asic_funcs->set_clock_gating(hdev);
8728         mutex_unlock(&gaudi->clk_gate_mutex);
8729
8730         if (rc) {
8731                 dev_err(hdev->dev,
8732                         "Timeout while waiting for TPC%d kernel to execute\n",
8733                         tpc_id);
8734                 return -EIO;
8735         }
8736
8737         return 0;
8738 }
8739
8740 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8741                 struct hl_ctx *ctx)
8742 {
8743         struct gaudi_device *gaudi = hdev->asic_specific;
8744         int min_alloc_order, rc, collective_cb_size;
8745
8746         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8747                 return 0;
8748
8749         hdev->internal_cb_pool_virt_addr =
8750                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8751                                         HOST_SPACE_INTERNAL_CB_SZ,
8752                                         &hdev->internal_cb_pool_dma_addr,
8753                                         GFP_KERNEL | __GFP_ZERO);
8754
8755         if (!hdev->internal_cb_pool_virt_addr)
8756                 return -ENOMEM;
8757
8758         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8759                         sizeof(struct packet_fence);
8760         min_alloc_order = ilog2(collective_cb_size);
8761
8762         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8763         if (!hdev->internal_cb_pool) {
8764                 dev_err(hdev->dev,
8765                         "Failed to create internal CB pool\n");
8766                 rc = -ENOMEM;
8767                 goto free_internal_cb_pool;
8768         }
8769
8770         rc = gen_pool_add(hdev->internal_cb_pool,
8771                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8772                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8773         if (rc) {
8774                 dev_err(hdev->dev,
8775                         "Failed to add memory to internal CB pool\n");
8776                 rc = -EFAULT;
8777                 goto destroy_internal_cb_pool;
8778         }
8779
8780         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8781                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8782                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8783
8784         if (!hdev->internal_cb_va_base) {
8785                 rc = -ENOMEM;
8786                 goto destroy_internal_cb_pool;
8787         }
8788
8789         mutex_lock(&ctx->mmu_lock);
8790         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8791                         hdev->internal_cb_pool_dma_addr,
8792                         HOST_SPACE_INTERNAL_CB_SZ);
8793
8794         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8795         mutex_unlock(&ctx->mmu_lock);
8796
8797         if (rc)
8798                 goto unreserve_internal_cb_pool;
8799
8800         return 0;
8801
8802 unreserve_internal_cb_pool:
8803         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8804                         HOST_SPACE_INTERNAL_CB_SZ);
8805 destroy_internal_cb_pool:
8806         gen_pool_destroy(hdev->internal_cb_pool);
8807 free_internal_cb_pool:
8808         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8809                         HOST_SPACE_INTERNAL_CB_SZ,
8810                         hdev->internal_cb_pool_virt_addr,
8811                         hdev->internal_cb_pool_dma_addr);
8812
8813         return rc;
8814 }
8815
8816 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8817                 struct hl_ctx *ctx)
8818 {
8819         struct gaudi_device *gaudi = hdev->asic_specific;
8820
8821         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8822                 return;
8823
8824         mutex_lock(&ctx->mmu_lock);
8825         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8826                         HOST_SPACE_INTERNAL_CB_SZ);
8827         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8828                         HOST_SPACE_INTERNAL_CB_SZ);
8829         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8830         mutex_unlock(&ctx->mmu_lock);
8831
8832         gen_pool_destroy(hdev->internal_cb_pool);
8833
8834         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8835                         HOST_SPACE_INTERNAL_CB_SZ,
8836                         hdev->internal_cb_pool_virt_addr,
8837                         hdev->internal_cb_pool_dma_addr);
8838 }
8839
8840 static int gaudi_ctx_init(struct hl_ctx *ctx)
8841 {
8842         int rc;
8843
8844         if (ctx->asid == HL_KERNEL_ASID_ID)
8845                 return 0;
8846
8847         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8848         if (rc)
8849                 return rc;
8850
8851         rc = gaudi_restore_user_registers(ctx->hdev);
8852         if (rc)
8853                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8854
8855         return rc;
8856 }
8857
8858 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8859 {
8860         if (ctx->asid == HL_KERNEL_ASID_ID)
8861                 return;
8862
8863         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8864 }
8865
8866 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8867 {
8868         return gaudi_cq_assignment[cq_idx];
8869 }
8870
8871 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8872 {
8873         return sizeof(struct packet_msg_short) +
8874                         sizeof(struct packet_msg_prot) * 2;
8875 }
8876
8877 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8878 {
8879         return sizeof(struct packet_msg_short) * 4 +
8880                         sizeof(struct packet_fence) +
8881                         sizeof(struct packet_msg_prot) * 2;
8882 }
8883
8884 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8885 {
8886         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8887 }
8888
8889 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8890                                 u32 size, bool eb)
8891 {
8892         struct hl_cb *cb = (struct hl_cb *) data;
8893         struct packet_msg_short *pkt;
8894         u32 value, ctl, pkt_size = sizeof(*pkt);
8895
8896         pkt = cb->kernel_address + size;
8897         memset(pkt, 0, pkt_size);
8898
8899         /* Inc by 1, Mode ADD */
8900         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8901         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8902
8903         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8904         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8905         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8906         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8907         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8908         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8909         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8910
8911         pkt->value = cpu_to_le32(value);
8912         pkt->ctl = cpu_to_le32(ctl);
8913
8914         return size + pkt_size;
8915 }
8916
8917 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8918                                         u16 addr)
8919 {
8920         u32 ctl, pkt_size = sizeof(*pkt);
8921
8922         memset(pkt, 0, pkt_size);
8923
8924         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8925         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8926         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8927         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8928         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8929         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8930
8931         pkt->value = cpu_to_le32(value);
8932         pkt->ctl = cpu_to_le32(ctl);
8933
8934         return pkt_size;
8935 }
8936
8937 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8938                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8939                 u16 sob_val, u16 mon_id)
8940 {
8941         u64 monitor_base;
8942         u32 ctl, value, pkt_size = sizeof(*pkt);
8943         u16 msg_addr_offset;
8944         u8 mask;
8945
8946         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8947                 dev_err(hdev->dev,
8948                         "sob_base %u (mask %#x) is not valid\n",
8949                         sob_base, sob_mask);
8950                 return 0;
8951         }
8952
8953         /*
8954          * monitor_base should be the content of the base0 address registers,
8955          * so it will be added to the msg short offsets
8956          */
8957         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8958
8959         msg_addr_offset =
8960                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8961                                 monitor_base;
8962
8963         memset(pkt, 0, pkt_size);
8964
8965         /* Monitor config packet: bind the monitor to a sync object */
8966         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8967         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8968         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8969                         0); /* GREATER OR EQUAL*/
8970         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8971
8972         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8973         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8974         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8975         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8976         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8977         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8978         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8979
8980         pkt->value = cpu_to_le32(value);
8981         pkt->ctl = cpu_to_le32(ctl);
8982
8983         return pkt_size;
8984 }
8985
8986 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8987 {
8988         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8989
8990         memset(pkt, 0, pkt_size);
8991
8992         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8993         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8994         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8995
8996         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8997         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8998         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8999         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
9000
9001         pkt->cfg = cpu_to_le32(cfg);
9002         pkt->ctl = cpu_to_le32(ctl);
9003
9004         return pkt_size;
9005 }
9006
9007 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
9008 {
9009         u32 offset, nic_index;
9010
9011         switch (queue_id) {
9012         case GAUDI_QUEUE_ID_DMA_0_0:
9013                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
9014                 break;
9015         case GAUDI_QUEUE_ID_DMA_0_1:
9016                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
9017                 break;
9018         case GAUDI_QUEUE_ID_DMA_0_2:
9019                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
9020                 break;
9021         case GAUDI_QUEUE_ID_DMA_0_3:
9022                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
9023                 break;
9024         case GAUDI_QUEUE_ID_DMA_1_0:
9025                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
9026                 break;
9027         case GAUDI_QUEUE_ID_DMA_1_1:
9028                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
9029                 break;
9030         case GAUDI_QUEUE_ID_DMA_1_2:
9031                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
9032                 break;
9033         case GAUDI_QUEUE_ID_DMA_1_3:
9034                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
9035                 break;
9036         case GAUDI_QUEUE_ID_DMA_5_0:
9037                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
9038                 break;
9039         case GAUDI_QUEUE_ID_DMA_5_1:
9040                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
9041                 break;
9042         case GAUDI_QUEUE_ID_DMA_5_2:
9043                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
9044                 break;
9045         case GAUDI_QUEUE_ID_DMA_5_3:
9046                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
9047                 break;
9048         case GAUDI_QUEUE_ID_TPC_7_0:
9049                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
9050                 break;
9051         case GAUDI_QUEUE_ID_TPC_7_1:
9052                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
9053                 break;
9054         case GAUDI_QUEUE_ID_TPC_7_2:
9055                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
9056                 break;
9057         case GAUDI_QUEUE_ID_TPC_7_3:
9058                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
9059                 break;
9060         case GAUDI_QUEUE_ID_NIC_0_0:
9061         case GAUDI_QUEUE_ID_NIC_1_0:
9062         case GAUDI_QUEUE_ID_NIC_2_0:
9063         case GAUDI_QUEUE_ID_NIC_3_0:
9064         case GAUDI_QUEUE_ID_NIC_4_0:
9065         case GAUDI_QUEUE_ID_NIC_5_0:
9066         case GAUDI_QUEUE_ID_NIC_6_0:
9067         case GAUDI_QUEUE_ID_NIC_7_0:
9068         case GAUDI_QUEUE_ID_NIC_8_0:
9069         case GAUDI_QUEUE_ID_NIC_9_0:
9070                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
9071                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
9072                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9073                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9074                 break;
9075         case GAUDI_QUEUE_ID_NIC_0_1:
9076         case GAUDI_QUEUE_ID_NIC_1_1:
9077         case GAUDI_QUEUE_ID_NIC_2_1:
9078         case GAUDI_QUEUE_ID_NIC_3_1:
9079         case GAUDI_QUEUE_ID_NIC_4_1:
9080         case GAUDI_QUEUE_ID_NIC_5_1:
9081         case GAUDI_QUEUE_ID_NIC_6_1:
9082         case GAUDI_QUEUE_ID_NIC_7_1:
9083         case GAUDI_QUEUE_ID_NIC_8_1:
9084         case GAUDI_QUEUE_ID_NIC_9_1:
9085                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
9086                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
9087                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9088                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9089                 break;
9090         case GAUDI_QUEUE_ID_NIC_0_2:
9091         case GAUDI_QUEUE_ID_NIC_1_2:
9092         case GAUDI_QUEUE_ID_NIC_2_2:
9093         case GAUDI_QUEUE_ID_NIC_3_2:
9094         case GAUDI_QUEUE_ID_NIC_4_2:
9095         case GAUDI_QUEUE_ID_NIC_5_2:
9096         case GAUDI_QUEUE_ID_NIC_6_2:
9097         case GAUDI_QUEUE_ID_NIC_7_2:
9098         case GAUDI_QUEUE_ID_NIC_8_2:
9099         case GAUDI_QUEUE_ID_NIC_9_2:
9100                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
9101                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
9102                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9103                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9104                 break;
9105         case GAUDI_QUEUE_ID_NIC_0_3:
9106         case GAUDI_QUEUE_ID_NIC_1_3:
9107         case GAUDI_QUEUE_ID_NIC_2_3:
9108         case GAUDI_QUEUE_ID_NIC_3_3:
9109         case GAUDI_QUEUE_ID_NIC_4_3:
9110         case GAUDI_QUEUE_ID_NIC_5_3:
9111         case GAUDI_QUEUE_ID_NIC_6_3:
9112         case GAUDI_QUEUE_ID_NIC_7_3:
9113         case GAUDI_QUEUE_ID_NIC_8_3:
9114         case GAUDI_QUEUE_ID_NIC_9_3:
9115                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
9116                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
9117                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9118                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9119                 break;
9120         default:
9121                 return -EINVAL;
9122         }
9123
9124         *addr = CFG_BASE + offset;
9125
9126         return 0;
9127 }
9128
9129 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
9130 {
9131         u64 monitor_base;
9132         u32 size = 0;
9133         u16 msg_addr_offset;
9134
9135         /*
9136          * monitor_base should be the content of the base0 address registers,
9137          * so it will be added to the msg short offsets
9138          */
9139         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9140
9141         /* First monitor config packet: low address of the sync */
9142         msg_addr_offset =
9143                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9144                                 monitor_base;
9145
9146         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9147                                         msg_addr_offset);
9148
9149         /* Second monitor config packet: high address of the sync */
9150         msg_addr_offset =
9151                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9152                                 monitor_base;
9153
9154         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9155                                         msg_addr_offset);
9156
9157         /*
9158          * Third monitor config packet: the payload, i.e. what to write when the
9159          * sync triggers
9160          */
9161         msg_addr_offset =
9162                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9163                                 monitor_base;
9164
9165         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9166
9167         return size;
9168 }
9169
9170 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9171                                 struct hl_gen_wait_properties *prop)
9172 {
9173         struct hl_cb *cb = (struct hl_cb *) prop->data;
9174         void *buf = cb->kernel_address;
9175         u64 fence_addr = 0;
9176         u32 size = prop->size;
9177
9178         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9179                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9180                                 prop->q_idx);
9181                 return 0;
9182         }
9183
9184         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9185         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9186                         prop->sob_mask, prop->sob_val, prop->mon_id);
9187         size += gaudi_add_fence_pkt(buf + size);
9188
9189         return size;
9190 }
9191
9192 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9193 {
9194         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9195
9196         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9197                 hw_sob->sob_id);
9198
9199         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9200                         hw_sob->sob_id * 4, 0);
9201
9202         kref_init(&hw_sob->kref);
9203 }
9204
9205 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9206 {
9207         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9208                                                         HL_POWER9_HOST_MAGIC) {
9209                 hdev->power9_64bit_dma_enable = 1;
9210                 hdev->dma_mask = 64;
9211         } else {
9212                 hdev->power9_64bit_dma_enable = 0;
9213                 hdev->dma_mask = 48;
9214         }
9215 }
9216
9217 static u64 gaudi_get_device_time(struct hl_device *hdev)
9218 {
9219         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9220
9221         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9222 }
9223
9224 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9225                                 u32 *block_size, u32 *block_id)
9226 {
9227         return -EPERM;
9228 }
9229
9230 static int gaudi_block_mmap(struct hl_device *hdev,
9231                                 struct vm_area_struct *vma,
9232                                 u32 block_id, u32 block_size)
9233 {
9234         return -EPERM;
9235 }
9236
9237 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9238 {
9239         struct cpu_dyn_regs *dyn_regs =
9240                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9241         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9242                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9243                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9244
9245         WREG32(irq_handler_offset,
9246                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9247 }
9248
9249 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9250 {
9251         switch (pll_idx) {
9252         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9253         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9254         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9255         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9256         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9257         case HL_GAUDI_MME_PLL: return MME_PLL;
9258         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9259         case HL_GAUDI_IF_PLL: return IF_PLL;
9260         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9261         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9262         default: return -EINVAL;
9263         }
9264 }
9265
9266 static int gaudi_add_sync_to_engine_map_entry(
9267         struct hl_sync_to_engine_map *map, u32 reg_value,
9268         enum hl_sync_engine_type engine_type, u32 engine_id)
9269 {
9270         struct hl_sync_to_engine_map_entry *entry;
9271
9272         /* Reg value represents a partial address of sync object,
9273          * it is used as unique identifier. For this we need to
9274          * clear the cutoff cfg base bits from the value.
9275          */
9276         if (reg_value == 0 || reg_value == 0xffffffff)
9277                 return 0;
9278         reg_value -= (u32)CFG_BASE;
9279
9280         /* create a new hash entry */
9281         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9282         if (!entry)
9283                 return -ENOMEM;
9284         entry->engine_type = engine_type;
9285         entry->engine_id = engine_id;
9286         entry->sync_id = reg_value;
9287         hash_add(map->tb, &entry->node, reg_value);
9288
9289         return 0;
9290 }
9291
9292 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9293                                 struct hl_sync_to_engine_map *map)
9294 {
9295         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9296         struct gaudi_device *gaudi = hdev->asic_specific;
9297         int i, j, rc;
9298         u32 reg_value;
9299
9300         /* Iterate over TPC engines */
9301         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9302                 /* TPC registered must be accessed with clock gating disabled */
9303                 mutex_lock(&gaudi->clk_gate_mutex);
9304                 hdev->asic_funcs->disable_clock_gating(hdev);
9305
9306                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9307                                         sds->props[SP_NEXT_TPC] * i);
9308
9309                 /* We can reenable clock_gating */
9310                 hdev->asic_funcs->set_clock_gating(hdev);
9311                 mutex_unlock(&gaudi->clk_gate_mutex);
9312
9313                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9314                                                         ENGINE_TPC, i);
9315                 if (rc)
9316                         goto free_sync_to_engine_map;
9317         }
9318
9319         /* Iterate over MME engines */
9320         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9321                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9322                         /* MME registered must be accessed with clock gating
9323                          * disabled
9324                          */
9325                         mutex_lock(&gaudi->clk_gate_mutex);
9326                         hdev->asic_funcs->disable_clock_gating(hdev);
9327
9328                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9329                                                 sds->props[SP_NEXT_MME] * i +
9330                                                 j * sizeof(u32));
9331
9332                         /* We can reenable clock_gating */
9333                         hdev->asic_funcs->set_clock_gating(hdev);
9334                         mutex_unlock(&gaudi->clk_gate_mutex);
9335
9336                         rc = gaudi_add_sync_to_engine_map_entry(
9337                                 map, reg_value, ENGINE_MME,
9338                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9339                         if (rc)
9340                                 goto free_sync_to_engine_map;
9341                 }
9342         }
9343
9344         /* Iterate over DMA engines */
9345         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9346                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9347                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9348                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9349                                                         ENGINE_DMA, i);
9350                 if (rc)
9351                         goto free_sync_to_engine_map;
9352         }
9353
9354         return 0;
9355
9356 free_sync_to_engine_map:
9357         hl_state_dump_free_sync_to_engine_map(map);
9358
9359         return rc;
9360 }
9361
9362 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9363 {
9364         return FIELD_GET(
9365                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9366                 mon->status);
9367 }
9368
9369 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9370 {
9371         const size_t max_write = 10;
9372         u32 gid, mask, sob;
9373         int i, offset;
9374
9375         /* Sync object ID is calculated as follows:
9376          * (8 * group_id + cleared bits in mask)
9377          */
9378         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9379                         mon->arm_data);
9380         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9381                         mon->arm_data);
9382
9383         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9384                 max_write; mask >>= 1, i++) {
9385                 if (!(mask & 1)) {
9386                         sob = gid * MONITOR_MAX_SOBS + i;
9387
9388                         if (offset > 0)
9389                                 offset += snprintf(sobs + offset, max_write,
9390                                                         ", ");
9391
9392                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9393                 }
9394         }
9395 }
9396
9397 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9398                                 struct hl_device *hdev,
9399                                 struct hl_mon_state_dump *mon)
9400 {
9401         const char *name;
9402         char scratch_buf1[BIN_REG_STRING_SIZE],
9403                 scratch_buf2[BIN_REG_STRING_SIZE];
9404         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9405
9406         name = hl_state_dump_get_monitor_name(hdev, mon);
9407         if (!name)
9408                 name = "";
9409
9410         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9411
9412         return hl_snprintf_resize(
9413                 buf, size, offset,
9414                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9415                 mon->id, name,
9416                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9417                                 mon->arm_data),
9418                 hl_format_as_binary(
9419                         scratch_buf1, sizeof(scratch_buf1),
9420                         FIELD_GET(
9421                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9422                                 mon->arm_data)),
9423                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9424                                 mon->arm_data),
9425                 mon->wr_data,
9426                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9427                 hl_format_as_binary(
9428                         scratch_buf2, sizeof(scratch_buf2),
9429                         FIELD_GET(
9430                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9431                                 mon->status)),
9432                 monitored_sobs);
9433 }
9434
9435
9436 static int gaudi_print_fences_single_engine(
9437         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9438         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9439         size_t *size, size_t *offset)
9440 {
9441         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9442         int rc = -ENOMEM, i;
9443         u32 *statuses, *fences;
9444
9445         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9446                         sizeof(*statuses), GFP_KERNEL);
9447         if (!statuses)
9448                 goto out;
9449
9450         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9451                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9452                          sizeof(*fences), GFP_KERNEL);
9453         if (!fences)
9454                 goto free_status;
9455
9456         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9457                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9458
9459         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9460                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9461                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9462
9463         /* The actual print */
9464         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9465                 u32 fence_id;
9466                 u64 fence_cnt, fence_rdata;
9467                 const char *engine_name;
9468
9469                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9470                         statuses[i]))
9471                         continue;
9472
9473                 fence_id =
9474                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9475                 fence_cnt = base_offset + CFG_BASE +
9476                         sizeof(u32) *
9477                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9478                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9479                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9480                 engine_name = hl_sync_engine_to_string(engine_type);
9481
9482                 rc = hl_snprintf_resize(
9483                         buf, size, offset,
9484                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9485                         engine_name, engine_id,
9486                         i, fence_id,
9487                         fence_cnt, engine_name, engine_id, fence_id, i,
9488                         fence_rdata, engine_name, engine_id, fence_id, i,
9489                         fences[fence_id],
9490                         statuses[i]);
9491                 if (rc)
9492                         goto free_fences;
9493         }
9494
9495         rc = 0;
9496
9497 free_fences:
9498         kfree(fences);
9499 free_status:
9500         kfree(statuses);
9501 out:
9502         return rc;
9503 }
9504
9505
9506 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9507         .monitor_valid = gaudi_monitor_valid,
9508         .print_single_monitor = gaudi_print_single_monitor,
9509         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9510         .print_fences_single_engine = gaudi_print_fences_single_engine,
9511 };
9512
9513 static void gaudi_state_dump_init(struct hl_device *hdev)
9514 {
9515         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9516         int i;
9517
9518         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9519                 hash_add(sds->so_id_to_str_tb,
9520                         &gaudi_so_id_to_str[i].node,
9521                         gaudi_so_id_to_str[i].id);
9522
9523         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9524                 hash_add(sds->monitor_id_to_str_tb,
9525                         &gaudi_monitor_id_to_str[i].node,
9526                         gaudi_monitor_id_to_str[i].id);
9527
9528         sds->props = gaudi_state_dump_specs_props;
9529
9530         sds->sync_namager_names = gaudi_sync_manager_names;
9531
9532         sds->funcs = gaudi_state_dump_funcs;
9533 }
9534
9535 static u32 *gaudi_get_stream_master_qid_arr(void)
9536 {
9537         return gaudi_stream_master;
9538 }
9539
9540 static const struct hl_asic_funcs gaudi_funcs = {
9541         .early_init = gaudi_early_init,
9542         .early_fini = gaudi_early_fini,
9543         .late_init = gaudi_late_init,
9544         .late_fini = gaudi_late_fini,
9545         .sw_init = gaudi_sw_init,
9546         .sw_fini = gaudi_sw_fini,
9547         .hw_init = gaudi_hw_init,
9548         .hw_fini = gaudi_hw_fini,
9549         .halt_engines = gaudi_halt_engines,
9550         .suspend = gaudi_suspend,
9551         .resume = gaudi_resume,
9552         .mmap = gaudi_mmap,
9553         .ring_doorbell = gaudi_ring_doorbell,
9554         .pqe_write = gaudi_pqe_write,
9555         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9556         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9557         .scrub_device_mem = gaudi_scrub_device_mem,
9558         .get_int_queue_base = gaudi_get_int_queue_base,
9559         .test_queues = gaudi_test_queues,
9560         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9561         .asic_dma_pool_free = gaudi_dma_pool_free,
9562         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9563         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9564         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9565         .cs_parser = gaudi_cs_parser,
9566         .asic_dma_map_sg = gaudi_dma_map_sg,
9567         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9568         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9569         .update_eq_ci = gaudi_update_eq_ci,
9570         .context_switch = gaudi_context_switch,
9571         .restore_phase_topology = gaudi_restore_phase_topology,
9572         .debugfs_read32 = gaudi_debugfs_read32,
9573         .debugfs_write32 = gaudi_debugfs_write32,
9574         .debugfs_read64 = gaudi_debugfs_read64,
9575         .debugfs_write64 = gaudi_debugfs_write64,
9576         .debugfs_read_dma = gaudi_debugfs_read_dma,
9577         .add_device_attr = hl_add_device_attr,
9578         .handle_eqe = gaudi_handle_eqe,
9579         .set_pll_profile = hl_set_pll_profile,
9580         .get_events_stat = gaudi_get_events_stat,
9581         .read_pte = gaudi_read_pte,
9582         .write_pte = gaudi_write_pte,
9583         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9584         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9585         .send_heartbeat = gaudi_send_heartbeat,
9586         .set_clock_gating = gaudi_set_clock_gating,
9587         .disable_clock_gating = gaudi_disable_clock_gating,
9588         .debug_coresight = gaudi_debug_coresight,
9589         .is_device_idle = gaudi_is_device_idle,
9590         .non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
9591         .hw_queues_lock = gaudi_hw_queues_lock,
9592         .hw_queues_unlock = gaudi_hw_queues_unlock,
9593         .get_pci_id = gaudi_get_pci_id,
9594         .get_eeprom_data = gaudi_get_eeprom_data,
9595         .send_cpu_message = gaudi_send_cpu_message,
9596         .pci_bars_map = gaudi_pci_bars_map,
9597         .init_iatu = gaudi_init_iatu,
9598         .rreg = hl_rreg,
9599         .wreg = hl_wreg,
9600         .halt_coresight = gaudi_halt_coresight,
9601         .ctx_init = gaudi_ctx_init,
9602         .ctx_fini = gaudi_ctx_fini,
9603         .get_clk_rate = hl_get_clk_rate,
9604         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9605         .load_firmware_to_device = gaudi_load_firmware_to_device,
9606         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9607         .get_signal_cb_size = gaudi_get_signal_cb_size,
9608         .get_wait_cb_size = gaudi_get_wait_cb_size,
9609         .gen_signal_cb = gaudi_gen_signal_cb,
9610         .gen_wait_cb = gaudi_gen_wait_cb,
9611         .reset_sob = gaudi_reset_sob,
9612         .reset_sob_group = gaudi_reset_sob_group,
9613         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9614         .get_device_time = gaudi_get_device_time,
9615         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9616         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9617         .scramble_addr = hl_mmu_scramble_addr,
9618         .descramble_addr = hl_mmu_descramble_addr,
9619         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9620         .get_hw_block_id = gaudi_get_hw_block_id,
9621         .hw_block_mmap = gaudi_block_mmap,
9622         .enable_events_from_fw = gaudi_enable_events_from_fw,
9623         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9624         .init_firmware_loader = gaudi_init_firmware_loader,
9625         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9626         .state_dump_init = gaudi_state_dump_init,
9627         .get_sob_addr = gaudi_get_sob_addr,
9628         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9629         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9630 };
9631
9632 /**
9633  * gaudi_set_asic_funcs - set GAUDI function pointers
9634  *
9635  * @hdev: pointer to hl_device structure
9636  *
9637  */
9638 void gaudi_set_asic_funcs(struct hl_device *hdev)
9639 {
9640         hdev->asic_funcs = &gaudi_funcs;
9641 }