habanalabs: duplicate HOP table props to MMU props
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461 };
462
463 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
464                                                                 u64 phys_addr);
465 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
466                                         struct hl_cs_job *job);
467 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
468                                         u32 size, u64 val);
469 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
470                                         u32 num_regs, u32 val);
471 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
472                                 u32 tpc_id);
473 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
474 static int gaudi_cpucp_info_get(struct hl_device *hdev);
475 static void gaudi_disable_clock_gating(struct hl_device *hdev);
476 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
477 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
478                                 u32 size, bool eb);
479 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
480                                 struct hl_gen_wait_properties *prop);
481 static inline enum hl_collective_mode
482 get_collective_mode(struct hl_device *hdev, u32 queue_id)
483 {
484         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
485                 return HL_COLLECTIVE_MASTER;
486
487         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
488                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
489                 return HL_COLLECTIVE_SLAVE;
490
491         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
492                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
493                 return HL_COLLECTIVE_SLAVE;
494
495         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
496                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
497                 return HL_COLLECTIVE_SLAVE;
498
499         return HL_COLLECTIVE_NOT_SUPPORTED;
500 }
501
502 static inline void set_default_power_values(struct hl_device *hdev)
503 {
504         struct asic_fixed_properties *prop = &hdev->asic_prop;
505
506         if (hdev->card_type == cpucp_card_type_pmc) {
507                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
508
509                 if (prop->fw_security_enabled)
510                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
511                 else
512                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
513         } else {
514                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
515                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
516         }
517 }
518
519 static int gaudi_set_fixed_properties(struct hl_device *hdev)
520 {
521         struct asic_fixed_properties *prop = &hdev->asic_prop;
522         u32 num_sync_stream_queues = 0;
523         int i;
524
525         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
526         prop->hw_queues_props = kcalloc(prop->max_queues,
527                         sizeof(struct hw_queue_properties),
528                         GFP_KERNEL);
529
530         if (!prop->hw_queues_props)
531                 return -ENOMEM;
532
533         for (i = 0 ; i < prop->max_queues ; i++) {
534                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
535                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
536                         prop->hw_queues_props[i].driver_only = 0;
537                         prop->hw_queues_props[i].supports_sync_stream = 1;
538                         prop->hw_queues_props[i].cb_alloc_flags =
539                                 CB_ALLOC_KERNEL;
540                         num_sync_stream_queues++;
541                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
542                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
543                         prop->hw_queues_props[i].driver_only = 1;
544                         prop->hw_queues_props[i].supports_sync_stream = 0;
545                         prop->hw_queues_props[i].cb_alloc_flags =
546                                 CB_ALLOC_KERNEL;
547                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
548                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
549                         prop->hw_queues_props[i].driver_only = 0;
550                         prop->hw_queues_props[i].supports_sync_stream = 0;
551                         prop->hw_queues_props[i].cb_alloc_flags =
552                                 CB_ALLOC_USER;
553
554                 }
555                 prop->hw_queues_props[i].collective_mode =
556                                                 get_collective_mode(hdev, i);
557         }
558
559         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
560         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
561         prop->collective_first_sob = 0;
562         prop->collective_first_mon = 0;
563
564         /* 2 SOBs per internal queue stream are reserved for collective */
565         prop->sync_stream_first_sob =
566                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
567                         * QMAN_STREAMS * HL_RSVD_SOBS;
568
569         /* 1 monitor per internal queue stream are reserved for collective
570          * 2 monitors per external queue stream are reserved for collective
571          */
572         prop->sync_stream_first_mon =
573                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
574                         (NUMBER_OF_EXT_HW_QUEUES * 2);
575
576         prop->dram_base_address = DRAM_PHYS_BASE;
577         prop->dram_size = GAUDI_HBM_SIZE_32GB;
578         prop->dram_end_address = prop->dram_base_address +
579                                         prop->dram_size;
580         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
581
582         prop->sram_base_address = SRAM_BASE_ADDR;
583         prop->sram_size = SRAM_SIZE;
584         prop->sram_end_address = prop->sram_base_address +
585                                         prop->sram_size;
586         prop->sram_user_base_address = prop->sram_base_address +
587                                         SRAM_USER_BASE_OFFSET;
588
589         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
590         if (hdev->pldm)
591                 prop->mmu_pgt_size = 0x800000; /* 8MB */
592         else
593                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
594         prop->mmu_pte_size = HL_PTE_SIZE;
595         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
596         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
597         prop->dram_page_size = PAGE_SIZE_2MB;
598         prop->dram_supports_virtual_memory = false;
599
600         prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
601         prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
602         prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
603         prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
604         prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
605         prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
606         prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
607         prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
608         prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
609         prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
610         prop->pmmu.start_addr = VA_HOST_SPACE_START;
611         prop->pmmu.end_addr =
612                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
613         prop->pmmu.page_size = PAGE_SIZE_4KB;
614         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
615         prop->pmmu.last_mask = LAST_MASK;
616         /* TODO: will be duplicated until implementing per-MMU props */
617         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
618         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
619
620         /* PMMU and HPMMU are the same except of page size */
621         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
622         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
623
624         /* shifts and masks are the same in PMMU and DMMU */
625         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
626         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
627         prop->dmmu.end_addr = VA_HOST_SPACE_END;
628         prop->dmmu.page_size = PAGE_SIZE_2MB;
629
630         prop->cfg_size = CFG_SIZE;
631         prop->max_asid = MAX_ASID;
632         prop->num_of_events = GAUDI_EVENT_SIZE;
633         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
634
635         set_default_power_values(hdev);
636
637         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
638         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
639
640         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
641         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
642
643         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
644                                         CARD_NAME_MAX_LEN);
645
646         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
647
648         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
649                         prop->sync_stream_first_sob +
650                         (num_sync_stream_queues * HL_RSVD_SOBS);
651         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
652                         prop->sync_stream_first_mon +
653                         (num_sync_stream_queues * HL_RSVD_MONS);
654
655         prop->first_available_user_msix_interrupt = USHRT_MAX;
656
657         for (i = 0 ; i < HL_MAX_DCORES ; i++)
658                 prop->first_available_cq[i] = USHRT_MAX;
659
660         prop->fw_cpu_boot_dev_sts0_valid = false;
661         prop->fw_cpu_boot_dev_sts1_valid = false;
662         prop->hard_reset_done_by_fw = false;
663         prop->gic_interrupts_enable = true;
664
665         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
666
667         prop->clk_pll_index = HL_GAUDI_MME_PLL;
668         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
669
670         prop->use_get_power_for_reset_history = true;
671
672         return 0;
673 }
674
675 static int gaudi_pci_bars_map(struct hl_device *hdev)
676 {
677         static const char * const name[] = {"SRAM", "CFG", "HBM"};
678         bool is_wc[3] = {false, false, true};
679         int rc;
680
681         rc = hl_pci_bars_map(hdev, name, is_wc);
682         if (rc)
683                 return rc;
684
685         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
686                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
687
688         return 0;
689 }
690
691 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
692 {
693         struct gaudi_device *gaudi = hdev->asic_specific;
694         struct hl_inbound_pci_region pci_region;
695         u64 old_addr = addr;
696         int rc;
697
698         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
699                 return old_addr;
700
701         if (hdev->asic_prop.iatu_done_by_fw)
702                 return U64_MAX;
703
704         /* Inbound Region 2 - Bar 4 - Point to HBM */
705         pci_region.mode = PCI_BAR_MATCH_MODE;
706         pci_region.bar = HBM_BAR_ID;
707         pci_region.addr = addr;
708         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
709         if (rc)
710                 return U64_MAX;
711
712         if (gaudi) {
713                 old_addr = gaudi->hbm_bar_cur_addr;
714                 gaudi->hbm_bar_cur_addr = addr;
715         }
716
717         return old_addr;
718 }
719
720 static int gaudi_init_iatu(struct hl_device *hdev)
721 {
722         struct hl_inbound_pci_region inbound_region;
723         struct hl_outbound_pci_region outbound_region;
724         int rc;
725
726         if (hdev->asic_prop.iatu_done_by_fw)
727                 return 0;
728
729         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
730         inbound_region.mode = PCI_BAR_MATCH_MODE;
731         inbound_region.bar = SRAM_BAR_ID;
732         inbound_region.addr = SRAM_BASE_ADDR;
733         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
734         if (rc)
735                 goto done;
736
737         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
738         inbound_region.mode = PCI_BAR_MATCH_MODE;
739         inbound_region.bar = CFG_BAR_ID;
740         inbound_region.addr = SPI_FLASH_BASE_ADDR;
741         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
742         if (rc)
743                 goto done;
744
745         /* Inbound Region 2 - Bar 4 - Point to HBM */
746         inbound_region.mode = PCI_BAR_MATCH_MODE;
747         inbound_region.bar = HBM_BAR_ID;
748         inbound_region.addr = DRAM_PHYS_BASE;
749         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
750         if (rc)
751                 goto done;
752
753         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
754
755         /* Outbound Region 0 - Point to Host */
756         outbound_region.addr = HOST_PHYS_BASE;
757         outbound_region.size = HOST_PHYS_SIZE;
758         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
759
760 done:
761         return rc;
762 }
763
764 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
765 {
766         return RREG32(mmHW_STATE);
767 }
768
769 static int gaudi_early_init(struct hl_device *hdev)
770 {
771         struct asic_fixed_properties *prop = &hdev->asic_prop;
772         struct pci_dev *pdev = hdev->pdev;
773         u32 fw_boot_status;
774         int rc;
775
776         rc = gaudi_set_fixed_properties(hdev);
777         if (rc) {
778                 dev_err(hdev->dev, "Failed setting fixed properties\n");
779                 return rc;
780         }
781
782         /* Check BAR sizes */
783         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
784                 dev_err(hdev->dev,
785                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
786                         SRAM_BAR_ID,
787                         (unsigned long long) pci_resource_len(pdev,
788                                                         SRAM_BAR_ID),
789                         SRAM_BAR_SIZE);
790                 rc = -ENODEV;
791                 goto free_queue_props;
792         }
793
794         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
795                 dev_err(hdev->dev,
796                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
797                         CFG_BAR_ID,
798                         (unsigned long long) pci_resource_len(pdev,
799                                                                 CFG_BAR_ID),
800                         CFG_BAR_SIZE);
801                 rc = -ENODEV;
802                 goto free_queue_props;
803         }
804
805         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
806         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
807
808         /* If FW security is enabled at this point it means no access to ELBI */
809         if (hdev->asic_prop.fw_security_enabled) {
810                 hdev->asic_prop.iatu_done_by_fw = true;
811
812                 /*
813                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
814                  * decision can only be taken based on PCI ID security.
815                  */
816                 hdev->asic_prop.gic_interrupts_enable = false;
817                 goto pci_init;
818         }
819
820         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
821                                 &fw_boot_status);
822         if (rc)
823                 goto free_queue_props;
824
825         /* Check whether FW is configuring iATU */
826         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
827                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
828                 hdev->asic_prop.iatu_done_by_fw = true;
829
830 pci_init:
831         rc = hl_pci_init(hdev);
832         if (rc)
833                 goto free_queue_props;
834
835         /* Before continuing in the initialization, we need to read the preboot
836          * version to determine whether we run with a security-enabled firmware
837          */
838         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
839                                         mmCPU_BOOT_DEV_STS0,
840                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
841                                         mmCPU_BOOT_ERR1,
842                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
843         if (rc) {
844                 if (hdev->reset_on_preboot_fail)
845                         hdev->asic_funcs->hw_fini(hdev, true, false);
846                 goto pci_fini;
847         }
848
849         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
850                 dev_info(hdev->dev,
851                         "H/W state is dirty, must reset before initializing\n");
852                 hdev->asic_funcs->hw_fini(hdev, true, false);
853         }
854
855         return 0;
856
857 pci_fini:
858         hl_pci_fini(hdev);
859 free_queue_props:
860         kfree(hdev->asic_prop.hw_queues_props);
861         return rc;
862 }
863
864 static int gaudi_early_fini(struct hl_device *hdev)
865 {
866         kfree(hdev->asic_prop.hw_queues_props);
867         hl_pci_fini(hdev);
868
869         return 0;
870 }
871
872 /**
873  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
874  *
875  * @hdev: pointer to hl_device structure
876  *
877  */
878 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
879 {
880         struct asic_fixed_properties *prop = &hdev->asic_prop;
881         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
882         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
883         int rc;
884
885         if (hdev->asic_prop.fw_security_enabled) {
886                 struct gaudi_device *gaudi = hdev->asic_specific;
887
888                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
889                         return 0;
890
891                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
892
893                 if (rc)
894                         return rc;
895
896                 freq = pll_freq_arr[2];
897         } else {
898                 /* Backward compatibility */
899                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
900                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
901                 nr = RREG32(mmPSOC_CPU_PLL_NR);
902                 nf = RREG32(mmPSOC_CPU_PLL_NF);
903                 od = RREG32(mmPSOC_CPU_PLL_OD);
904
905                 if (div_sel == DIV_SEL_REF_CLK ||
906                                 div_sel == DIV_SEL_DIVIDED_REF) {
907                         if (div_sel == DIV_SEL_REF_CLK)
908                                 freq = PLL_REF_CLK;
909                         else
910                                 freq = PLL_REF_CLK / (div_fctr + 1);
911                 } else if (div_sel == DIV_SEL_PLL_CLK ||
912                         div_sel == DIV_SEL_DIVIDED_PLL) {
913                         pll_clk = PLL_REF_CLK * (nf + 1) /
914                                         ((nr + 1) * (od + 1));
915                         if (div_sel == DIV_SEL_PLL_CLK)
916                                 freq = pll_clk;
917                         else
918                                 freq = pll_clk / (div_fctr + 1);
919                 } else {
920                         dev_warn(hdev->dev,
921                                 "Received invalid div select value: %d",
922                                 div_sel);
923                         freq = 0;
924                 }
925         }
926
927         prop->psoc_timestamp_frequency = freq;
928         prop->psoc_pci_pll_nr = nr;
929         prop->psoc_pci_pll_nf = nf;
930         prop->psoc_pci_pll_od = od;
931         prop->psoc_pci_pll_div_factor = div_fctr;
932
933         return 0;
934 }
935
936 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
937                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
938 {
939         struct asic_fixed_properties *prop = &hdev->asic_prop;
940         struct packet_lin_dma *init_tpc_mem_pkt;
941         struct hl_cs_job *job;
942         struct hl_cb *cb;
943         u64 dst_addr;
944         u32 cb_size, ctl;
945         u8 tpc_id;
946         int rc;
947
948         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
949         if (!cb)
950                 return -EFAULT;
951
952         init_tpc_mem_pkt = cb->kernel_address;
953         cb_size = sizeof(*init_tpc_mem_pkt);
954         memset(init_tpc_mem_pkt, 0, cb_size);
955
956         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
957
958         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
959         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
960         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
961         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
962
963         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
964
965         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
966         dst_addr = (prop->sram_user_base_address &
967                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
968                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
969         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
970
971         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
972         if (!job) {
973                 dev_err(hdev->dev, "Failed to allocate a new job\n");
974                 rc = -ENOMEM;
975                 goto release_cb;
976         }
977
978         job->id = 0;
979         job->user_cb = cb;
980         atomic_inc(&job->user_cb->cs_cnt);
981         job->user_cb_size = cb_size;
982         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
983         job->patched_cb = job->user_cb;
984         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
985
986         hl_debugfs_add_job(hdev, job);
987
988         rc = gaudi_send_job_on_qman0(hdev, job);
989
990         if (rc)
991                 goto free_job;
992
993         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
994                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
995                 if (rc)
996                         break;
997         }
998
999 free_job:
1000         hl_userptr_delete_list(hdev, &job->userptr_list);
1001         hl_debugfs_remove_job(hdev, job);
1002         kfree(job);
1003         atomic_dec(&cb->cs_cnt);
1004
1005 release_cb:
1006         hl_cb_put(cb);
1007         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1008
1009         return rc;
1010 }
1011
1012 /*
1013  * gaudi_init_tpc_mem() - Initialize TPC memories.
1014  * @hdev: Pointer to hl_device structure.
1015  *
1016  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1017  *
1018  * Return: 0 for success, negative value for error.
1019  */
1020 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1021 {
1022         const struct firmware *fw;
1023         size_t fw_size;
1024         void *cpu_addr;
1025         dma_addr_t dma_handle;
1026         int rc, count = 5;
1027
1028 again:
1029         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1030         if (rc == -EINTR && count-- > 0) {
1031                 msleep(50);
1032                 goto again;
1033         }
1034
1035         if (rc) {
1036                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1037                                 GAUDI_TPC_FW_FILE);
1038                 goto out;
1039         }
1040
1041         fw_size = fw->size;
1042         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1043                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1044         if (!cpu_addr) {
1045                 dev_err(hdev->dev,
1046                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1047                         fw_size);
1048                 rc = -ENOMEM;
1049                 goto out;
1050         }
1051
1052         memcpy(cpu_addr, fw->data, fw_size);
1053
1054         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1055
1056         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1057                         dma_handle);
1058
1059 out:
1060         release_firmware(fw);
1061         return rc;
1062 }
1063
1064 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1065 {
1066         struct gaudi_device *gaudi = hdev->asic_specific;
1067         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1068         struct hl_hw_queue *q;
1069         u32 i, sob_id, sob_group_id, queue_id;
1070
1071         /* Iterate through SOB groups and assign a SOB for each slave queue */
1072         sob_group_id =
1073                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1074         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1075
1076         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1077         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1078                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1079                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1080         }
1081
1082         /* Both DMA5 and TPC7 use the same resources since only a single
1083          * engine need to participate in the reduction process
1084          */
1085         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1086         q = &hdev->kernel_queues[queue_id];
1087         q->sync_stream_prop.collective_sob_id =
1088                         sob_id + NIC_NUMBER_OF_ENGINES;
1089
1090         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1091         q = &hdev->kernel_queues[queue_id];
1092         q->sync_stream_prop.collective_sob_id =
1093                         sob_id + NIC_NUMBER_OF_ENGINES;
1094 }
1095
1096 static void gaudi_sob_group_hw_reset(struct kref *ref)
1097 {
1098         struct gaudi_hw_sob_group *hw_sob_group =
1099                 container_of(ref, struct gaudi_hw_sob_group, kref);
1100         struct hl_device *hdev = hw_sob_group->hdev;
1101         int i;
1102
1103         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1104                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1105                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1106
1107         kref_init(&hw_sob_group->kref);
1108 }
1109
1110 static void gaudi_sob_group_reset_error(struct kref *ref)
1111 {
1112         struct gaudi_hw_sob_group *hw_sob_group =
1113                 container_of(ref, struct gaudi_hw_sob_group, kref);
1114         struct hl_device *hdev = hw_sob_group->hdev;
1115
1116         dev_crit(hdev->dev,
1117                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1118                 hw_sob_group->base_sob_id);
1119 }
1120
1121 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1122 {
1123         struct gaudi_collective_properties *prop;
1124         int i;
1125
1126         prop = &gaudi->collective_props;
1127
1128         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1129
1130         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1131                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1132                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1133                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1134         /* Set collective engine bit */
1135         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1136                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1137 }
1138
1139 static int gaudi_collective_init(struct hl_device *hdev)
1140 {
1141         u32 i, sob_id, reserved_sobs_per_group;
1142         struct gaudi_collective_properties *prop;
1143         struct gaudi_device *gaudi;
1144
1145         gaudi = hdev->asic_specific;
1146         prop = &gaudi->collective_props;
1147         sob_id = hdev->asic_prop.collective_first_sob;
1148
1149         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1150         reserved_sobs_per_group =
1151                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1152
1153         /* Init SOB groups */
1154         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1155                 prop->hw_sob_group[i].hdev = hdev;
1156                 prop->hw_sob_group[i].base_sob_id = sob_id;
1157                 sob_id += reserved_sobs_per_group;
1158                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1159         }
1160
1161         for (i = 0 ; i < QMAN_STREAMS; i++) {
1162                 prop->next_sob_group_val[i] = 1;
1163                 prop->curr_sob_group_idx[i] = 0;
1164                 gaudi_collective_map_sobs(hdev, i);
1165         }
1166
1167         gaudi_collective_mstr_sob_mask_set(gaudi);
1168
1169         return 0;
1170 }
1171
1172 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1173 {
1174         struct gaudi_device *gaudi = hdev->asic_specific;
1175         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1176
1177         kref_put(&cprop->hw_sob_group[sob_group].kref,
1178                                         gaudi_sob_group_hw_reset);
1179 }
1180
1181 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1182                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1183 {
1184         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1185         struct gaudi_collective_properties *cprop;
1186         struct hl_gen_wait_properties wait_prop;
1187         struct hl_sync_stream_properties *prop;
1188         struct gaudi_device *gaudi;
1189
1190         gaudi = hdev->asic_specific;
1191         cprop = &gaudi->collective_props;
1192         queue_id = job->hw_queue_id;
1193         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1194
1195         master_sob_base =
1196                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1197         master_monitor = prop->collective_mstr_mon_id[0];
1198
1199         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1200
1201         dev_dbg(hdev->dev,
1202                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1203                 master_sob_base, cprop->mstr_sob_mask[0],
1204                 cprop->next_sob_group_val[stream],
1205                 master_monitor, queue_id);
1206
1207         wait_prop.data = (void *) job->patched_cb;
1208         wait_prop.sob_base = master_sob_base;
1209         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1210         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1211         wait_prop.mon_id = master_monitor;
1212         wait_prop.q_idx = queue_id;
1213         wait_prop.size = cb_size;
1214         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1215
1216         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1217         master_monitor = prop->collective_mstr_mon_id[1];
1218
1219         dev_dbg(hdev->dev,
1220                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1221                 master_sob_base, cprop->mstr_sob_mask[1],
1222                 cprop->next_sob_group_val[stream],
1223                 master_monitor, queue_id);
1224
1225         wait_prop.sob_base = master_sob_base;
1226         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1227         wait_prop.mon_id = master_monitor;
1228         wait_prop.size = cb_size;
1229         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1230 }
1231
1232 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1233                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1234 {
1235         struct hl_gen_wait_properties wait_prop;
1236         struct hl_sync_stream_properties *prop;
1237         u32 queue_id, cb_size = 0;
1238
1239         queue_id = job->hw_queue_id;
1240         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1241
1242         if (job->cs->encaps_signals) {
1243                 /* use the encaps signal handle store earlier in the flow
1244                  * and set the SOB information from the encaps
1245                  * signals handle
1246                  */
1247                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1248                                                 cs_cmpl);
1249
1250                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1251                                 job->cs->sequence,
1252                                 cs_cmpl->hw_sob->sob_id,
1253                                 cs_cmpl->sob_val);
1254         }
1255
1256         /* Add to wait CBs using slave monitor */
1257         wait_prop.data = (void *) job->user_cb;
1258         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1259         wait_prop.sob_mask = 0x1;
1260         wait_prop.sob_val = cs_cmpl->sob_val;
1261         wait_prop.mon_id = prop->collective_slave_mon_id;
1262         wait_prop.q_idx = queue_id;
1263         wait_prop.size = cb_size;
1264
1265         dev_dbg(hdev->dev,
1266                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1267                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1268                 prop->collective_slave_mon_id, queue_id);
1269
1270         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1271
1272         dev_dbg(hdev->dev,
1273                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1274                 prop->collective_sob_id, queue_id);
1275
1276         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1277                         prop->collective_sob_id, cb_size, false);
1278 }
1279
1280 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1281 {
1282         struct hl_cs_compl *signal_cs_cmpl =
1283                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1284         struct hl_cs_compl *cs_cmpl =
1285                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1286         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1287         struct gaudi_collective_properties *cprop;
1288         u32 stream, queue_id, sob_group_offset;
1289         struct gaudi_device *gaudi;
1290         struct hl_device *hdev;
1291         struct hl_cs_job *job;
1292         struct hl_ctx *ctx;
1293
1294         ctx = cs->ctx;
1295         hdev = ctx->hdev;
1296         gaudi = hdev->asic_specific;
1297         cprop = &gaudi->collective_props;
1298
1299         if (cs->encaps_signals) {
1300                 cs_cmpl->hw_sob = handle->hw_sob;
1301                 /* at this checkpoint we only need the hw_sob pointer
1302                  * for the completion check before start going over the jobs
1303                  * of the master/slaves, the sob_value will be taken later on
1304                  * in gaudi_collective_slave_init_job depends on each
1305                  * job wait offset value.
1306                  */
1307                 cs_cmpl->sob_val = 0;
1308         } else {
1309                 /* copy the SOB id and value of the signal CS */
1310                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1311                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1312         }
1313
1314         /* check again if the signal cs already completed.
1315          * if yes then don't send any wait cs since the hw_sob
1316          * could be in reset already. if signal is not completed
1317          * then get refcount to hw_sob to prevent resetting the sob
1318          * while wait cs is not submitted.
1319          * note that this check is protected by two locks,
1320          * hw queue lock and completion object lock,
1321          * and the same completion object lock also protects
1322          * the hw_sob reset handler function.
1323          * The hw_queue lock prevent out of sync of hw_sob
1324          * refcount value, changed by signal/wait flows.
1325          */
1326         spin_lock(&signal_cs_cmpl->lock);
1327
1328         if (completion_done(&cs->signal_fence->completion)) {
1329                 spin_unlock(&signal_cs_cmpl->lock);
1330                 return -EINVAL;
1331         }
1332         /* Increment kref since all slave queues are now waiting on it */
1333         kref_get(&cs_cmpl->hw_sob->kref);
1334
1335         spin_unlock(&signal_cs_cmpl->lock);
1336
1337         /* Calculate the stream from collective master queue (1st job) */
1338         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1339         stream = job->hw_queue_id % 4;
1340         sob_group_offset =
1341                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1342
1343         list_for_each_entry(job, &cs->job_list, cs_node) {
1344                 queue_id = job->hw_queue_id;
1345
1346                 if (hdev->kernel_queues[queue_id].collective_mode ==
1347                                 HL_COLLECTIVE_MASTER)
1348                         gaudi_collective_master_init_job(hdev, job, stream,
1349                                                 sob_group_offset);
1350                 else
1351                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1352         }
1353
1354         cs_cmpl->sob_group = sob_group_offset;
1355
1356         /* Handle sob group kref and wraparound */
1357         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1358         cprop->next_sob_group_val[stream]++;
1359
1360         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1361                 /*
1362                  * Decrement as we reached the max value.
1363                  * The release function won't be called here as we've
1364                  * just incremented the refcount.
1365                  */
1366                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1367                                 gaudi_sob_group_reset_error);
1368                 cprop->next_sob_group_val[stream] = 1;
1369                 /* only two SOBs are currently in use */
1370                 cprop->curr_sob_group_idx[stream] =
1371                         (cprop->curr_sob_group_idx[stream] + 1) &
1372                                                         (HL_RSVD_SOBS - 1);
1373
1374                 gaudi_collective_map_sobs(hdev, stream);
1375
1376                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1377                                 cprop->curr_sob_group_idx[stream], stream);
1378         }
1379
1380         mb();
1381         hl_fence_put(cs->signal_fence);
1382         cs->signal_fence = NULL;
1383
1384         return 0;
1385 }
1386
1387 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1388                 struct hl_ctx *ctx, struct hl_cs *cs,
1389                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1390                 u32 encaps_signal_offset)
1391 {
1392         struct hw_queue_properties *hw_queue_prop;
1393         struct hl_cs_counters_atomic *cntr;
1394         struct hl_cs_job *job;
1395         struct hl_cb *cb;
1396         u32 cb_size;
1397         bool patched_cb;
1398
1399         cntr = &hdev->aggregated_cs_counters;
1400
1401         if (mode == HL_COLLECTIVE_MASTER) {
1402                 /* CB size of collective master queue contains
1403                  * 4 msg short packets for monitor 1 configuration
1404                  * 1 fence packet
1405                  * 4 msg short packets for monitor 2 configuration
1406                  * 1 fence packet
1407                  * 2 msg prot packets for completion and MSI-X
1408                  */
1409                 cb_size = sizeof(struct packet_msg_short) * 8 +
1410                                 sizeof(struct packet_fence) * 2 +
1411                                 sizeof(struct packet_msg_prot) * 2;
1412                 patched_cb = true;
1413         } else {
1414                 /* CB size of collective slave queues contains
1415                  * 4 msg short packets for monitor configuration
1416                  * 1 fence packet
1417                  * 1 additional msg short packet for sob signal
1418                  */
1419                 cb_size = sizeof(struct packet_msg_short) * 5 +
1420                                 sizeof(struct packet_fence);
1421                 patched_cb = false;
1422         }
1423
1424         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1425         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1426         if (!job) {
1427                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1428                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1429                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1430                 return -ENOMEM;
1431         }
1432
1433         /* Allocate internal mapped CB for non patched CBs */
1434         cb = hl_cb_kernel_create(hdev, cb_size,
1435                         hdev->mmu_enable && !patched_cb);
1436         if (!cb) {
1437                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1438                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1439                 kfree(job);
1440                 return -EFAULT;
1441         }
1442
1443         job->id = 0;
1444         job->cs = cs;
1445         job->user_cb = cb;
1446         atomic_inc(&job->user_cb->cs_cnt);
1447         job->user_cb_size = cb_size;
1448         job->hw_queue_id = queue_id;
1449
1450         /* since its guaranteed to have only one chunk in the collective wait
1451          * cs, we can use this chunk to set the encapsulated signal offset
1452          * in the jobs.
1453          */
1454         if (cs->encaps_signals)
1455                 job->encaps_sig_wait_offset = encaps_signal_offset;
1456
1457         /*
1458          * No need in parsing, user CB is the patched CB.
1459          * We call hl_cb_destroy() out of two reasons - we don't need
1460          * the CB in the CB idr anymore and to decrement its refcount as
1461          * it was incremented inside hl_cb_kernel_create().
1462          */
1463         if (patched_cb)
1464                 job->patched_cb = job->user_cb;
1465         else
1466                 job->patched_cb = NULL;
1467
1468         job->job_cb_size = job->user_cb_size;
1469         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1470
1471         /* increment refcount as for external queues we get completion */
1472         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1473                 cs_get(cs);
1474
1475         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1476
1477         list_add_tail(&job->cs_node, &cs->job_list);
1478
1479         hl_debugfs_add_job(hdev, job);
1480
1481         return 0;
1482 }
1483
1484 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1485                 struct hl_ctx *ctx, struct hl_cs *cs,
1486                 u32 wait_queue_id, u32 collective_engine_id,
1487                 u32 encaps_signal_offset)
1488 {
1489         struct gaudi_device *gaudi = hdev->asic_specific;
1490         struct hw_queue_properties *hw_queue_prop;
1491         u32 queue_id, collective_queue, num_jobs;
1492         u32 stream, nic_queue, nic_idx = 0;
1493         bool skip;
1494         int i, rc = 0;
1495
1496         /* Verify wait queue id is configured as master */
1497         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1498         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1499                 dev_err(hdev->dev,
1500                         "Queue %d is not configured as collective master\n",
1501                         wait_queue_id);
1502                 return -EINVAL;
1503         }
1504
1505         /* Verify engine id is supported */
1506         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1507                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1508                 dev_err(hdev->dev,
1509                         "Collective wait does not support engine %u\n",
1510                         collective_engine_id);
1511                 return -EINVAL;
1512         }
1513
1514         stream = wait_queue_id % 4;
1515
1516         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1517                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1518         else
1519                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1520
1521         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1522         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1523
1524         /* First job goes to the collective master queue, it will wait for
1525          * the collective slave queues to finish execution.
1526          * The synchronization is done using two monitors:
1527          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1528          * reduction engine (DMA5/TPC7).
1529          *
1530          * Rest of the jobs goes to the collective slave queues which will
1531          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1532          */
1533         for (i = 0 ; i < num_jobs ; i++) {
1534                 if (i == 0) {
1535                         queue_id = wait_queue_id;
1536                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537                                 HL_COLLECTIVE_MASTER, queue_id,
1538                                 wait_queue_id, encaps_signal_offset);
1539                 } else {
1540                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1541                                 if (gaudi->hw_cap_initialized &
1542                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1543                                         skip = false;
1544                                 else
1545                                         skip = true;
1546
1547                                 queue_id = nic_queue;
1548                                 nic_queue += 4;
1549                                 nic_idx++;
1550
1551                                 if (skip)
1552                                         continue;
1553                         } else {
1554                                 queue_id = collective_queue;
1555                         }
1556
1557                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1558                                 HL_COLLECTIVE_SLAVE, queue_id,
1559                                 wait_queue_id, encaps_signal_offset);
1560                 }
1561
1562                 if (rc)
1563                         return rc;
1564         }
1565
1566         return rc;
1567 }
1568
1569 static int gaudi_late_init(struct hl_device *hdev)
1570 {
1571         struct gaudi_device *gaudi = hdev->asic_specific;
1572         int rc;
1573
1574         rc = gaudi->cpucp_info_get(hdev);
1575         if (rc) {
1576                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1577                 return rc;
1578         }
1579
1580         if ((hdev->card_type == cpucp_card_type_pci) &&
1581                         (hdev->nic_ports_mask & 0x3)) {
1582                 dev_info(hdev->dev,
1583                         "PCI card detected, only 8 ports are enabled\n");
1584                 hdev->nic_ports_mask &= ~0x3;
1585
1586                 /* Stop and disable unused NIC QMANs */
1587                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1588                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1589                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1590
1591                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1592                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1593                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1594
1595                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1596                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1597
1598                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1599         }
1600
1601         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1602         if (rc) {
1603                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1604                 return rc;
1605         }
1606
1607         /* Scrub both SRAM and DRAM */
1608         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1609         if (rc)
1610                 goto disable_pci_access;
1611
1612         rc = gaudi_fetch_psoc_frequency(hdev);
1613         if (rc) {
1614                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1615                 goto disable_pci_access;
1616         }
1617
1618         rc = gaudi_mmu_clear_pgt_range(hdev);
1619         if (rc) {
1620                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1621                 goto disable_pci_access;
1622         }
1623
1624         rc = gaudi_init_tpc_mem(hdev);
1625         if (rc) {
1626                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1627                 goto disable_pci_access;
1628         }
1629
1630         rc = gaudi_collective_init(hdev);
1631         if (rc) {
1632                 dev_err(hdev->dev, "Failed to init collective\n");
1633                 goto disable_pci_access;
1634         }
1635
1636         /* We only support a single ASID for the user, so for the sake of optimization, just
1637          * initialize the ASID one time during device initialization with the fixed value of 1
1638          */
1639         gaudi_mmu_prepare(hdev, 1);
1640
1641         hl_fw_set_pll_profile(hdev);
1642
1643         return 0;
1644
1645 disable_pci_access:
1646         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1647
1648         return rc;
1649 }
1650
1651 static void gaudi_late_fini(struct hl_device *hdev)
1652 {
1653         const struct hwmon_channel_info **channel_info_arr;
1654         int i = 0;
1655
1656         if (!hdev->hl_chip_info->info)
1657                 return;
1658
1659         channel_info_arr = hdev->hl_chip_info->info;
1660
1661         while (channel_info_arr[i]) {
1662                 kfree(channel_info_arr[i]->config);
1663                 kfree(channel_info_arr[i]);
1664                 i++;
1665         }
1666
1667         kfree(channel_info_arr);
1668
1669         hdev->hl_chip_info->info = NULL;
1670 }
1671
1672 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1673 {
1674         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1675         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1676         int i, j, rc = 0;
1677
1678         /*
1679          * The device CPU works with 40-bits addresses, while bit 39 must be set
1680          * to '1' when accessing the host.
1681          * Bits 49:39 of the full host address are saved for a later
1682          * configuration of the HW to perform extension to 50 bits.
1683          * Because there is a single HW register that holds the extension bits,
1684          * these bits must be identical in all allocated range.
1685          */
1686
1687         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1688                 virt_addr_arr[i] =
1689                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1690                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1691                                                 &dma_addr_arr[i],
1692                                                 GFP_KERNEL | __GFP_ZERO);
1693                 if (!virt_addr_arr[i]) {
1694                         rc = -ENOMEM;
1695                         goto free_dma_mem_arr;
1696                 }
1697
1698                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1699                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1700                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1701                         break;
1702         }
1703
1704         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1705                 dev_err(hdev->dev,
1706                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1707                 rc = -EFAULT;
1708                 goto free_dma_mem_arr;
1709         }
1710
1711         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1712         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1713         hdev->cpu_pci_msb_addr =
1714                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1715
1716         if (!hdev->asic_prop.fw_security_enabled)
1717                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1718
1719 free_dma_mem_arr:
1720         for (j = 0 ; j < i ; j++)
1721                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1722                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1723                                                 virt_addr_arr[j],
1724                                                 dma_addr_arr[j]);
1725
1726         return rc;
1727 }
1728
1729 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1730 {
1731         struct gaudi_device *gaudi = hdev->asic_specific;
1732         struct gaudi_internal_qman_info *q;
1733         u32 i;
1734
1735         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1736                 q = &gaudi->internal_qmans[i];
1737                 if (!q->pq_kernel_addr)
1738                         continue;
1739                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1740                                                         q->pq_kernel_addr,
1741                                                         q->pq_dma_addr);
1742         }
1743 }
1744
1745 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1746 {
1747         struct gaudi_device *gaudi = hdev->asic_specific;
1748         struct gaudi_internal_qman_info *q;
1749         int rc, i;
1750
1751         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1752                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1753                         continue;
1754
1755                 q = &gaudi->internal_qmans[i];
1756
1757                 switch (i) {
1758                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1759                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1760                         break;
1761                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1762                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1763                         break;
1764                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1765                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1766                         break;
1767                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1768                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1769                         break;
1770                 default:
1771                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1772                         rc = -EINVAL;
1773                         goto free_internal_qmans_pq_mem;
1774                 }
1775
1776                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1777                                                 hdev, q->pq_size,
1778                                                 &q->pq_dma_addr,
1779                                                 GFP_KERNEL | __GFP_ZERO);
1780                 if (!q->pq_kernel_addr) {
1781                         rc = -ENOMEM;
1782                         goto free_internal_qmans_pq_mem;
1783                 }
1784         }
1785
1786         return 0;
1787
1788 free_internal_qmans_pq_mem:
1789         gaudi_free_internal_qmans_pq_mem(hdev);
1790         return rc;
1791 }
1792
1793 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1794 {
1795         struct asic_fixed_properties *prop = &hdev->asic_prop;
1796         struct pci_mem_region *region;
1797
1798         /* CFG */
1799         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1800         region->region_base = CFG_BASE;
1801         region->region_size = CFG_SIZE;
1802         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1803         region->bar_size = CFG_BAR_SIZE;
1804         region->bar_id = CFG_BAR_ID;
1805         region->used = 1;
1806
1807         /* SRAM */
1808         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1809         region->region_base = SRAM_BASE_ADDR;
1810         region->region_size = SRAM_SIZE;
1811         region->offset_in_bar = 0;
1812         region->bar_size = SRAM_BAR_SIZE;
1813         region->bar_id = SRAM_BAR_ID;
1814         region->used = 1;
1815
1816         /* DRAM */
1817         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1818         region->region_base = DRAM_PHYS_BASE;
1819         region->region_size = hdev->asic_prop.dram_size;
1820         region->offset_in_bar = 0;
1821         region->bar_size = prop->dram_pci_bar_size;
1822         region->bar_id = HBM_BAR_ID;
1823         region->used = 1;
1824
1825         /* SP SRAM */
1826         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1827         region->region_base = PSOC_SCRATCHPAD_ADDR;
1828         region->region_size = PSOC_SCRATCHPAD_SIZE;
1829         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1830         region->bar_size = CFG_BAR_SIZE;
1831         region->bar_id = CFG_BAR_ID;
1832         region->used = 1;
1833 }
1834
1835 static int gaudi_sw_init(struct hl_device *hdev)
1836 {
1837         struct gaudi_device *gaudi;
1838         u32 i, event_id = 0;
1839         int rc;
1840
1841         /* Allocate device structure */
1842         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1843         if (!gaudi)
1844                 return -ENOMEM;
1845
1846         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1847                 if (gaudi_irq_map_table[i].valid) {
1848                         if (event_id == GAUDI_EVENT_SIZE) {
1849                                 dev_err(hdev->dev,
1850                                         "Event array exceeds the limit of %u events\n",
1851                                         GAUDI_EVENT_SIZE);
1852                                 rc = -EINVAL;
1853                                 goto free_gaudi_device;
1854                         }
1855
1856                         gaudi->events[event_id++] =
1857                                         gaudi_irq_map_table[i].fc_id;
1858                 }
1859         }
1860
1861         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1862
1863         hdev->asic_specific = gaudi;
1864
1865         /* Create DMA pool for small allocations */
1866         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1867                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1868         if (!hdev->dma_pool) {
1869                 dev_err(hdev->dev, "failed to create DMA pool\n");
1870                 rc = -ENOMEM;
1871                 goto free_gaudi_device;
1872         }
1873
1874         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1875         if (rc)
1876                 goto free_dma_pool;
1877
1878         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1879         if (!hdev->cpu_accessible_dma_pool) {
1880                 dev_err(hdev->dev,
1881                         "Failed to create CPU accessible DMA pool\n");
1882                 rc = -ENOMEM;
1883                 goto free_cpu_dma_mem;
1884         }
1885
1886         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1887                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1888                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1889         if (rc) {
1890                 dev_err(hdev->dev,
1891                         "Failed to add memory to CPU accessible DMA pool\n");
1892                 rc = -EFAULT;
1893                 goto free_cpu_accessible_dma_pool;
1894         }
1895
1896         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1897         if (rc)
1898                 goto free_cpu_accessible_dma_pool;
1899
1900         spin_lock_init(&gaudi->hw_queues_lock);
1901
1902         hdev->supports_sync_stream = true;
1903         hdev->supports_coresight = true;
1904         hdev->supports_staged_submission = true;
1905         hdev->supports_wait_for_multi_cs = true;
1906
1907         hdev->asic_funcs->set_pci_memory_regions(hdev);
1908         hdev->stream_master_qid_arr =
1909                                 hdev->asic_funcs->get_stream_master_qid_arr();
1910         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1911
1912         return 0;
1913
1914 free_cpu_accessible_dma_pool:
1915         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1916 free_cpu_dma_mem:
1917         if (!hdev->asic_prop.fw_security_enabled)
1918                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1919                                         hdev->cpu_pci_msb_addr);
1920         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1921                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1922                         hdev->cpu_accessible_dma_mem,
1923                         hdev->cpu_accessible_dma_address);
1924 free_dma_pool:
1925         dma_pool_destroy(hdev->dma_pool);
1926 free_gaudi_device:
1927         kfree(gaudi);
1928         return rc;
1929 }
1930
1931 static int gaudi_sw_fini(struct hl_device *hdev)
1932 {
1933         struct gaudi_device *gaudi = hdev->asic_specific;
1934
1935         gaudi_free_internal_qmans_pq_mem(hdev);
1936
1937         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1938
1939         if (!hdev->asic_prop.fw_security_enabled)
1940                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1941                                         hdev->cpu_pci_msb_addr);
1942
1943         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1944                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1945                         hdev->cpu_accessible_dma_mem,
1946                         hdev->cpu_accessible_dma_address);
1947
1948         dma_pool_destroy(hdev->dma_pool);
1949
1950         kfree(gaudi);
1951
1952         return 0;
1953 }
1954
1955 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1956 {
1957         struct hl_device *hdev = arg;
1958         int i;
1959
1960         if (hdev->disabled)
1961                 return IRQ_HANDLED;
1962
1963         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1964                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1965
1966         hl_irq_handler_eq(irq, &hdev->event_queue);
1967
1968         return IRQ_HANDLED;
1969 }
1970
1971 /*
1972  * For backward compatibility, new MSI interrupts should be set after the
1973  * existing CPU and NIC interrupts.
1974  */
1975 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1976                                 bool cpu_eq)
1977 {
1978         int msi_vec;
1979
1980         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1981                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1982                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1983
1984         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1985                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1986
1987         return pci_irq_vector(hdev->pdev, msi_vec);
1988 }
1989
1990 static int gaudi_enable_msi_single(struct hl_device *hdev)
1991 {
1992         int rc, irq;
1993
1994         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1995
1996         irq = gaudi_pci_irq_vector(hdev, 0, false);
1997         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1998                         "gaudi single msi", hdev);
1999         if (rc)
2000                 dev_err(hdev->dev,
2001                         "Failed to request single MSI IRQ\n");
2002
2003         return rc;
2004 }
2005
2006 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2007 {
2008         int cq_cnt = hdev->asic_prop.completion_queues_count;
2009         int rc, i, irq_cnt_init, irq;
2010
2011         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2012                 irq = gaudi_pci_irq_vector(hdev, i, false);
2013                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2014                                 &hdev->completion_queue[i]);
2015                 if (rc) {
2016                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2017                         goto free_irqs;
2018                 }
2019         }
2020
2021         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2022         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2023                                 &hdev->event_queue);
2024         if (rc) {
2025                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2026                 goto free_irqs;
2027         }
2028
2029         return 0;
2030
2031 free_irqs:
2032         for (i = 0 ; i < irq_cnt_init ; i++)
2033                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2034                                 &hdev->completion_queue[i]);
2035         return rc;
2036 }
2037
2038 static int gaudi_enable_msi(struct hl_device *hdev)
2039 {
2040         struct gaudi_device *gaudi = hdev->asic_specific;
2041         int rc;
2042
2043         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2044                 return 0;
2045
2046         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2047         if (rc < 0) {
2048                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2049                 return rc;
2050         }
2051
2052         if (rc < NUMBER_OF_INTERRUPTS) {
2053                 gaudi->multi_msi_mode = false;
2054                 rc = gaudi_enable_msi_single(hdev);
2055         } else {
2056                 gaudi->multi_msi_mode = true;
2057                 rc = gaudi_enable_msi_multi(hdev);
2058         }
2059
2060         if (rc)
2061                 goto free_pci_irq_vectors;
2062
2063         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2064
2065         return 0;
2066
2067 free_pci_irq_vectors:
2068         pci_free_irq_vectors(hdev->pdev);
2069         return rc;
2070 }
2071
2072 static void gaudi_sync_irqs(struct hl_device *hdev)
2073 {
2074         struct gaudi_device *gaudi = hdev->asic_specific;
2075         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2076
2077         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2078                 return;
2079
2080         /* Wait for all pending IRQs to be finished */
2081         if (gaudi->multi_msi_mode) {
2082                 for (i = 0 ; i < cq_cnt ; i++)
2083                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2084
2085                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2086                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2087                                                 true));
2088         } else {
2089                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2090         }
2091 }
2092
2093 static void gaudi_disable_msi(struct hl_device *hdev)
2094 {
2095         struct gaudi_device *gaudi = hdev->asic_specific;
2096         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2097
2098         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2099                 return;
2100
2101         gaudi_sync_irqs(hdev);
2102
2103         if (gaudi->multi_msi_mode) {
2104                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2105                                                 true);
2106                 free_irq(irq, &hdev->event_queue);
2107
2108                 for (i = 0 ; i < cq_cnt ; i++) {
2109                         irq = gaudi_pci_irq_vector(hdev, i, false);
2110                         free_irq(irq, &hdev->completion_queue[i]);
2111                 }
2112         } else {
2113                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2114         }
2115
2116         pci_free_irq_vectors(hdev->pdev);
2117
2118         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2119 }
2120
2121 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2122 {
2123         struct gaudi_device *gaudi = hdev->asic_specific;
2124
2125         if (hdev->asic_prop.fw_security_enabled)
2126                 return;
2127
2128         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2129                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2130                 return;
2131
2132         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2133                 return;
2134
2135         if (!hdev->sram_scrambler_enable)
2136                 return;
2137
2138         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2139                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2140         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2141                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2142         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2143                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2145                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2147                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2149                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2151                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2153                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154
2155         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2156                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2157         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2158                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2159         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2160                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2162                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2164                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2166                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2168                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2170                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171
2172         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2173                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2174         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2175                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2176         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2177                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2179                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2181                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2183                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2185                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2187                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188
2189         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2190 }
2191
2192 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2193 {
2194         struct gaudi_device *gaudi = hdev->asic_specific;
2195
2196         if (hdev->asic_prop.fw_security_enabled)
2197                 return;
2198
2199         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2200                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2201                 return;
2202
2203         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2204                 return;
2205
2206         if (!hdev->dram_scrambler_enable)
2207                 return;
2208
2209         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2210                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2211         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2212                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2213         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2214                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2215         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2216                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2217         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2218                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2219         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2220                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2221         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2222                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2223         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2224                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2225
2226         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2227                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2228         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2229                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2230         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2231                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2232         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2233                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2234         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2235                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2236         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2237                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2238         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2239                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2240         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2241                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2242
2243         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2244                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2245         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2246                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2247         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2248                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2249         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2250                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2251         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2252                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2253         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2254                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2255         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2256                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2257         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2258                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2259
2260         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2261 }
2262
2263 static void gaudi_init_e2e(struct hl_device *hdev)
2264 {
2265         if (hdev->asic_prop.fw_security_enabled)
2266                 return;
2267
2268         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2269                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2270                 return;
2271
2272         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2273         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2274         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2275         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2276
2277         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2278         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2279         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2280         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2281
2282         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2283         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2284         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2285         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2286
2287         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2288         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2289         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2290         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2291
2292         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2293         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2294         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2295         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2296
2297         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2298         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2299         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2300         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2301
2302         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2303         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2304         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2305         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2306
2307         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2308         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2309         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2310         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2311
2312         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2313         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2314         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2315         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2316
2317         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2318         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2319         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2320         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2321
2322         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2323         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2324         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2325         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2326
2327         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2328         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2329         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2330         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2331
2332         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2333         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2334         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2335         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2336
2337         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2338         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2339         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2340         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2341
2342         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2343         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2344         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2345         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2346
2347         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2348         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2349         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2350         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2351
2352         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2353         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2354         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2355         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2356
2357         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2358         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2359         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2360         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2361
2362         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2363         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2364         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2365         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2366
2367         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2368         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2369         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2370         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2371
2372         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2373         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2374         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2375         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2376
2377         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2378         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2379         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2380         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2381
2382         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2383         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2384         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2385         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2386
2387         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2388         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2389         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2390         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2391
2392         if (!hdev->dram_scrambler_enable) {
2393                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2394                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2395                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2396                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2397
2398                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2399                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2400                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2401                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2402
2403                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2404                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2405                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2406                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2407
2408                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2409                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2410                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2411                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2412
2413                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2414                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2415                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2416                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2417
2418                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2419                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2420                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2421                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2422
2423                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2424                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2425                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2426                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2427
2428                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2429                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2430                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2431                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2432
2433                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2434                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2435                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2436                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2437
2438                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2439                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2440                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2441                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2442
2443                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2444                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2445                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2446                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2447
2448                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2449                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2450                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2451                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2452
2453                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2454                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2455                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2456                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2457
2458                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2459                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2460                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2461                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2462
2463                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2464                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2465                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2466                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2467
2468                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2469                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2470                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2471                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2472
2473                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2474                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2475                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2476                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2477
2478                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2479                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2480                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2481                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2482
2483                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2484                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2485                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2486                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2487
2488                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2489                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2490                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2491                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2492
2493                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2494                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2495                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2496                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2497
2498                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2499                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2500                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2501                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2502
2503                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2504                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2505                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2506                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2507
2508                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2509                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2510                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2511                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2512         }
2513
2514         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2515                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2516         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2517                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2518
2519         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2520                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2521         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2522                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2523
2524         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2525                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2526         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2527                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2528
2529         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2530                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2531         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2532                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2533
2534         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2535                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2536         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2537                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2538
2539         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2540                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2541         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2542                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2543
2544         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2545                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2546         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2547                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2548
2549         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2550                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2551         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2552                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2553
2554         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2555                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2556         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2557                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2558
2559         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2560                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2561         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2562                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2563
2564         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2565                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2566         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2567                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2568
2569         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2570                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2571         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2572                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2573
2574         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2575                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2576         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2577                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2578
2579         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2580                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2581         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2582                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2583
2584         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2585                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2586         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2587                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2588
2589         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2590                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2591         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2592                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2593
2594         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2595                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2596         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2597                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2598
2599         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2600                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2601         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2602                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2603
2604         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2605                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2606         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2607                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2608
2609         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2610                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2611         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2612                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2613
2614         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2615                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2616         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2617                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2618
2619         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2620                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2621         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2622                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2623
2624         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2625                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2626         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2627                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2628
2629         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2630                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2631         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2632                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2633 }
2634
2635 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2636 {
2637         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2638
2639         if (hdev->asic_prop.fw_security_enabled)
2640                 return;
2641
2642         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2643                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2644                 return;
2645
2646         hbm0_wr = 0x33333333;
2647         hbm0_rd = 0x77777777;
2648         hbm1_wr = 0x55555555;
2649         hbm1_rd = 0xDDDDDDDD;
2650
2651         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2652         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2653         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2654         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2655
2656         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2657         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2658         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2659         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2660
2661         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2662         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2663         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2664         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2665
2666         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2667         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2668         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2669         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2670
2671         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2672                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2673                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2674         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2675                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2676                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2677         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2678                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2679                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2680         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2681                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2682                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2683
2684         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2685                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2686                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2687         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2688                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2689                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2690         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2691                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2692                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2693         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2694                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2695                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2696 }
2697
2698 static void gaudi_init_golden_registers(struct hl_device *hdev)
2699 {
2700         u32 tpc_offset;
2701         int tpc_id, i;
2702
2703         gaudi_init_e2e(hdev);
2704         gaudi_init_hbm_cred(hdev);
2705
2706         for (tpc_id = 0, tpc_offset = 0;
2707                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2708                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2709                 /* Mask all arithmetic interrupts from TPC */
2710                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2711                 /* Set 16 cache lines */
2712                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2713                                 ICACHE_FETCH_LINE_NUM, 2);
2714         }
2715
2716         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2717         for (i = 0 ; i < 128 ; i += 8)
2718                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2719
2720         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2721         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2722         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2723         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2724 }
2725
2726 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2727                                         int qman_id, dma_addr_t qman_pq_addr)
2728 {
2729         struct cpu_dyn_regs *dyn_regs =
2730                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2731         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2732         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2733         u32 q_off, dma_qm_offset;
2734         u32 dma_qm_err_cfg, irq_handler_offset;
2735
2736         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2737
2738         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2739                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2741                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2742         so_base_en_lo = lower_32_bits(CFG_BASE +
2743                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744         so_base_en_hi = upper_32_bits(CFG_BASE +
2745                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2746         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2747                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2749                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2750         so_base_ws_lo = lower_32_bits(CFG_BASE +
2751                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752         so_base_ws_hi = upper_32_bits(CFG_BASE +
2753                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2754
2755         q_off = dma_qm_offset + qman_id * 4;
2756
2757         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2758         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2759
2760         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2761         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2762         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2763
2764         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2765         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2766                                                         QMAN_LDMA_SRC_OFFSET);
2767         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2768                                                         QMAN_LDMA_DST_OFFSET);
2769
2770         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2771         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2772         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2773         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2774         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2775         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2776         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2777         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2778
2779         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2780
2781         /* The following configuration is needed only once per QMAN */
2782         if (qman_id == 0) {
2783                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2784                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2785                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2786
2787                 /* Configure RAZWI IRQ */
2788                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2789                 if (hdev->stop_on_err)
2790                         dma_qm_err_cfg |=
2791                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2792
2793                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2794
2795                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2796                         lower_32_bits(CFG_BASE + irq_handler_offset));
2797                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2798                         upper_32_bits(CFG_BASE + irq_handler_offset));
2799
2800                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2801                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2802                                                                         dma_id);
2803
2804                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2805                                 QM_ARB_ERR_MSG_EN_MASK);
2806
2807                 /* Increase ARB WDT to support streams architecture */
2808                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2809                                 GAUDI_ARB_WDT_TIMEOUT);
2810
2811                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2812                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2813
2814                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2815         }
2816 }
2817
2818 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2819 {
2820         struct cpu_dyn_regs *dyn_regs =
2821                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2822         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2823         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2824         u32 irq_handler_offset;
2825
2826         /* Set to maximum possible according to physical size */
2827         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2828         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2829
2830         /* WA for H/W bug H3-2116 */
2831         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2832
2833         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2834         if (hdev->stop_on_err)
2835                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2836
2837         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2838
2839         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2840                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2841                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2842
2843         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2844                 lower_32_bits(CFG_BASE + irq_handler_offset));
2845         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2846                 upper_32_bits(CFG_BASE + irq_handler_offset));
2847
2848         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2849                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2850         WREG32(mmDMA0_CORE_PROT + dma_offset,
2851                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2852         /* If the channel is secured, it should be in MMU bypass mode */
2853         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2854                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2855         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2856 }
2857
2858 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2859                                 u32 enable_mask)
2860 {
2861         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2862
2863         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2864 }
2865
2866 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2867 {
2868         struct gaudi_device *gaudi = hdev->asic_specific;
2869         struct hl_hw_queue *q;
2870         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2871
2872         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2873                 return;
2874
2875         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2876                 dma_id = gaudi_dma_assignment[i];
2877                 /*
2878                  * For queues after the CPU Q need to add 1 to get the correct
2879                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2880                  * order to get the correct MSI register.
2881                  */
2882                 if (dma_id > 1) {
2883                         cpu_skip = 1;
2884                         nic_skip = NIC_NUMBER_OF_ENGINES;
2885                 } else {
2886                         cpu_skip = 0;
2887                         nic_skip = 0;
2888                 }
2889
2890                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2891                         q_idx = 4 * dma_id + j + cpu_skip;
2892                         q = &hdev->kernel_queues[q_idx];
2893                         q->cq_id = cq_id++;
2894                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2895                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2896                                                 q->bus_address);
2897                 }
2898
2899                 gaudi_init_dma_core(hdev, dma_id);
2900
2901                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2902         }
2903
2904         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2905 }
2906
2907 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2908                                         int qman_id, u64 qman_base_addr)
2909 {
2910         struct cpu_dyn_regs *dyn_regs =
2911                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2912         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2913         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2914         u32 dma_qm_err_cfg, irq_handler_offset;
2915         u32 q_off, dma_qm_offset;
2916
2917         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2918
2919         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2920                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2921         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2922                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2923         so_base_en_lo = lower_32_bits(CFG_BASE +
2924                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2925         so_base_en_hi = upper_32_bits(CFG_BASE +
2926                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2927         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2928                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2929         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2930                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2931         so_base_ws_lo = lower_32_bits(CFG_BASE +
2932                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2933         so_base_ws_hi = upper_32_bits(CFG_BASE +
2934                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2935
2936         q_off = dma_qm_offset + qman_id * 4;
2937
2938         if (qman_id < 4) {
2939                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2940                                         lower_32_bits(qman_base_addr));
2941                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2942                                         upper_32_bits(qman_base_addr));
2943
2944                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2945                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2946                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2947
2948                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2949                                                         QMAN_CPDMA_SIZE_OFFSET);
2950                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2951                                                         QMAN_CPDMA_SRC_OFFSET);
2952                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2953                                                         QMAN_CPDMA_DST_OFFSET);
2954         } else {
2955                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2956                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2957                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2958
2959                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2960                                                         QMAN_LDMA_SIZE_OFFSET);
2961                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2962                                                         QMAN_LDMA_SRC_OFFSET);
2963                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2964                                                         QMAN_LDMA_DST_OFFSET);
2965
2966                 /* Configure RAZWI IRQ */
2967                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2968                 if (hdev->stop_on_err)
2969                         dma_qm_err_cfg |=
2970                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2971
2972                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2973
2974                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2975                         lower_32_bits(CFG_BASE + irq_handler_offset));
2976                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2977                         upper_32_bits(CFG_BASE + irq_handler_offset));
2978
2979                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2980                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2981                                                                         dma_id);
2982
2983                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2984                                 QM_ARB_ERR_MSG_EN_MASK);
2985
2986                 /* Increase ARB WDT to support streams architecture */
2987                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2988                                 GAUDI_ARB_WDT_TIMEOUT);
2989
2990                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2991                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2992                                 QMAN_INTERNAL_MAKE_TRUSTED);
2993         }
2994
2995         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2996         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2997         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2998         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2999
3000         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
3001         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
3002                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3003                                 mtr_base_ws_lo);
3004                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3005                                 mtr_base_ws_hi);
3006                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3007                                 so_base_ws_lo);
3008                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3009                                 so_base_ws_hi);
3010         }
3011 }
3012
3013 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
3014 {
3015         struct gaudi_device *gaudi = hdev->asic_specific;
3016         struct gaudi_internal_qman_info *q;
3017         u64 qman_base_addr;
3018         int i, j, dma_id, internal_q_index;
3019
3020         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3021                 return;
3022
3023         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3024                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3025
3026                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3027                          /*
3028                           * Add the CPU queue in order to get the correct queue
3029                           * number as all internal queue are placed after it
3030                           */
3031                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3032
3033                         q = &gaudi->internal_qmans[internal_q_index];
3034                         qman_base_addr = (u64) q->pq_dma_addr;
3035                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3036                                                 qman_base_addr);
3037                 }
3038
3039                 /* Initializing lower CP for HBM DMA QMAN */
3040                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3041
3042                 gaudi_init_dma_core(hdev, dma_id);
3043
3044                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3045         }
3046
3047         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3048 }
3049
3050 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3051                                         int qman_id, u64 qman_base_addr)
3052 {
3053         struct cpu_dyn_regs *dyn_regs =
3054                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3055         u32 mtr_base_lo, mtr_base_hi;
3056         u32 so_base_lo, so_base_hi;
3057         u32 irq_handler_offset;
3058         u32 q_off, mme_id;
3059         u32 mme_qm_err_cfg;
3060
3061         mtr_base_lo = lower_32_bits(CFG_BASE +
3062                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3063         mtr_base_hi = upper_32_bits(CFG_BASE +
3064                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3065         so_base_lo = lower_32_bits(CFG_BASE +
3066                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3067         so_base_hi = upper_32_bits(CFG_BASE +
3068                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3069
3070         q_off = mme_offset + qman_id * 4;
3071
3072         if (qman_id < 4) {
3073                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3074                                         lower_32_bits(qman_base_addr));
3075                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3076                                         upper_32_bits(qman_base_addr));
3077
3078                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3079                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3080                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3081
3082                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3083                                                         QMAN_CPDMA_SIZE_OFFSET);
3084                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3085                                                         QMAN_CPDMA_SRC_OFFSET);
3086                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3087                                                         QMAN_CPDMA_DST_OFFSET);
3088         } else {
3089                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3090                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3091                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3092
3093                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3094                                                         QMAN_LDMA_SIZE_OFFSET);
3095                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3096                                                         QMAN_LDMA_SRC_OFFSET);
3097                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3098                                                         QMAN_LDMA_DST_OFFSET);
3099
3100                 /* Configure RAZWI IRQ */
3101                 mme_id = mme_offset /
3102                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3103
3104                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3105                 if (hdev->stop_on_err)
3106                         mme_qm_err_cfg |=
3107                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3108
3109                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3110
3111                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3112                         lower_32_bits(CFG_BASE + irq_handler_offset));
3113                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3114                         upper_32_bits(CFG_BASE + irq_handler_offset));
3115
3116                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3117                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3118                                                                         mme_id);
3119
3120                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3121                                 QM_ARB_ERR_MSG_EN_MASK);
3122
3123                 /* Increase ARB WDT to support streams architecture */
3124                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3125                                 GAUDI_ARB_WDT_TIMEOUT);
3126
3127                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3128                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3129                                 QMAN_INTERNAL_MAKE_TRUSTED);
3130         }
3131
3132         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3133         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3134         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3135         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3136 }
3137
3138 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3139 {
3140         struct gaudi_device *gaudi = hdev->asic_specific;
3141         struct gaudi_internal_qman_info *q;
3142         u64 qman_base_addr;
3143         u32 mme_offset;
3144         int i, internal_q_index;
3145
3146         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3147                 return;
3148
3149         /*
3150          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3151          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3152          */
3153
3154         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3155
3156         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3157                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3158                 q = &gaudi->internal_qmans[internal_q_index];
3159                 qman_base_addr = (u64) q->pq_dma_addr;
3160                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3161                                         qman_base_addr);
3162                 if (i == 3)
3163                         mme_offset = 0;
3164         }
3165
3166         /* Initializing lower CP for MME QMANs */
3167         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3168         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3169         gaudi_init_mme_qman(hdev, 0, 4, 0);
3170
3171         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3172         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3173
3174         gaudi->hw_cap_initialized |= HW_CAP_MME;
3175 }
3176
3177 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3178                                 int qman_id, u64 qman_base_addr)
3179 {
3180         struct cpu_dyn_regs *dyn_regs =
3181                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3182         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3183         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3184         u32 tpc_qm_err_cfg, irq_handler_offset;
3185         u32 q_off, tpc_id;
3186
3187         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3188                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3189         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3190                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3191         so_base_en_lo = lower_32_bits(CFG_BASE +
3192                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3193         so_base_en_hi = upper_32_bits(CFG_BASE +
3194                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3195         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3196                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3197         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3198                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3199         so_base_ws_lo = lower_32_bits(CFG_BASE +
3200                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3201         so_base_ws_hi = upper_32_bits(CFG_BASE +
3202                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3203
3204         q_off = tpc_offset + qman_id * 4;
3205
3206         tpc_id = tpc_offset /
3207                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3208
3209         if (qman_id < 4) {
3210                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3211                                         lower_32_bits(qman_base_addr));
3212                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3213                                         upper_32_bits(qman_base_addr));
3214
3215                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3216                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3217                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3218
3219                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3220                                                         QMAN_CPDMA_SIZE_OFFSET);
3221                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3222                                                         QMAN_CPDMA_SRC_OFFSET);
3223                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3224                                                         QMAN_CPDMA_DST_OFFSET);
3225         } else {
3226                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3227                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3228                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3229
3230                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3231                                                         QMAN_LDMA_SIZE_OFFSET);
3232                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3233                                                         QMAN_LDMA_SRC_OFFSET);
3234                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3235                                                         QMAN_LDMA_DST_OFFSET);
3236
3237                 /* Configure RAZWI IRQ */
3238                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3239                 if (hdev->stop_on_err)
3240                         tpc_qm_err_cfg |=
3241                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3242
3243                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3244
3245                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3246                         lower_32_bits(CFG_BASE + irq_handler_offset));
3247                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3248                         upper_32_bits(CFG_BASE + irq_handler_offset));
3249
3250                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3251                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3252                                                                         tpc_id);
3253
3254                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3255                                 QM_ARB_ERR_MSG_EN_MASK);
3256
3257                 /* Increase ARB WDT to support streams architecture */
3258                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3259                                 GAUDI_ARB_WDT_TIMEOUT);
3260
3261                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3262                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3263                                 QMAN_INTERNAL_MAKE_TRUSTED);
3264         }
3265
3266         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3267         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3268         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3269         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3270
3271         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3272         if (tpc_id == 6) {
3273                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3274                                 mtr_base_ws_lo);
3275                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3276                                 mtr_base_ws_hi);
3277                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3278                                 so_base_ws_lo);
3279                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3280                                 so_base_ws_hi);
3281         }
3282 }
3283
3284 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3285 {
3286         struct gaudi_device *gaudi = hdev->asic_specific;
3287         struct gaudi_internal_qman_info *q;
3288         u64 qman_base_addr;
3289         u32 so_base_hi, tpc_offset = 0;
3290         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3291                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3292         int i, tpc_id, internal_q_index;
3293
3294         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3295                 return;
3296
3297         so_base_hi = upper_32_bits(CFG_BASE +
3298                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3299
3300         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3301                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3302                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3303                                                 tpc_id * QMAN_STREAMS + i;
3304                         q = &gaudi->internal_qmans[internal_q_index];
3305                         qman_base_addr = (u64) q->pq_dma_addr;
3306                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3307                                                 qman_base_addr);
3308
3309                         if (i == 3) {
3310                                 /* Initializing lower CP for TPC QMAN */
3311                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3312
3313                                 /* Enable the QMAN and TPC channel */
3314                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3315                                                 QMAN_TPC_ENABLE);
3316                         }
3317                 }
3318
3319                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3320                                 so_base_hi);
3321
3322                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3323
3324                 gaudi->hw_cap_initialized |=
3325                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3326         }
3327 }
3328
3329 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3330                                 int qman_id, u64 qman_base_addr, int nic_id)
3331 {
3332         struct cpu_dyn_regs *dyn_regs =
3333                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3334         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3335         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3336         u32 nic_qm_err_cfg, irq_handler_offset;
3337         u32 q_off;
3338
3339         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3340                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3341         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3342                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3343         so_base_en_lo = lower_32_bits(CFG_BASE +
3344                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3345         so_base_en_hi = upper_32_bits(CFG_BASE +
3346                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3347         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3348                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3349         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3350                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3351         so_base_ws_lo = lower_32_bits(CFG_BASE +
3352                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3353         so_base_ws_hi = upper_32_bits(CFG_BASE +
3354                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3355
3356         q_off = nic_offset + qman_id * 4;
3357
3358         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3359         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3360
3361         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3362         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3363         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3364
3365         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3366                                                         QMAN_LDMA_SIZE_OFFSET);
3367         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3368                                                         QMAN_LDMA_SRC_OFFSET);
3369         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3370                                                         QMAN_LDMA_DST_OFFSET);
3371
3372         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3373         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3374         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3375         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3376
3377         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3378         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3379         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3380         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3381         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3382
3383         if (qman_id == 0) {
3384                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3385                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3386                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3387
3388                 /* Configure RAZWI IRQ */
3389                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3390                 if (hdev->stop_on_err)
3391                         nic_qm_err_cfg |=
3392                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3393
3394                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3395
3396                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3397                         lower_32_bits(CFG_BASE + irq_handler_offset));
3398                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3399                         upper_32_bits(CFG_BASE + irq_handler_offset));
3400
3401                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3402                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3403                                                                         nic_id);
3404
3405                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3406                                 QM_ARB_ERR_MSG_EN_MASK);
3407
3408                 /* Increase ARB WDT to support streams architecture */
3409                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3410                                 GAUDI_ARB_WDT_TIMEOUT);
3411
3412                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3413                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3414                                 QMAN_INTERNAL_MAKE_TRUSTED);
3415         }
3416 }
3417
3418 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3419 {
3420         struct gaudi_device *gaudi = hdev->asic_specific;
3421         struct gaudi_internal_qman_info *q;
3422         u64 qman_base_addr;
3423         u32 nic_offset = 0;
3424         u32 nic_delta_between_qmans =
3425                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3426         u32 nic_delta_between_nics =
3427                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3428         int i, nic_id, internal_q_index;
3429
3430         if (!hdev->nic_ports_mask)
3431                 return;
3432
3433         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3434                 return;
3435
3436         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3437
3438         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3439                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3440                         nic_offset += nic_delta_between_qmans;
3441                         if (nic_id & 1) {
3442                                 nic_offset -= (nic_delta_between_qmans * 2);
3443                                 nic_offset += nic_delta_between_nics;
3444                         }
3445                         continue;
3446                 }
3447
3448                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3449                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3450                                                 nic_id * QMAN_STREAMS + i;
3451                         q = &gaudi->internal_qmans[internal_q_index];
3452                         qman_base_addr = (u64) q->pq_dma_addr;
3453                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3454                                                 qman_base_addr, nic_id);
3455                 }
3456
3457                 /* Enable the QMAN */
3458                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3459
3460                 nic_offset += nic_delta_between_qmans;
3461                 if (nic_id & 1) {
3462                         nic_offset -= (nic_delta_between_qmans * 2);
3463                         nic_offset += nic_delta_between_nics;
3464                 }
3465
3466                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3467         }
3468 }
3469
3470 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3471 {
3472         struct gaudi_device *gaudi = hdev->asic_specific;
3473
3474         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3475                 return;
3476
3477         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3478         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3479         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3480 }
3481
3482 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3483 {
3484         struct gaudi_device *gaudi = hdev->asic_specific;
3485
3486         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3487                 return;
3488
3489         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3490         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3491         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3492         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3493         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3494 }
3495
3496 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3497 {
3498         struct gaudi_device *gaudi = hdev->asic_specific;
3499
3500         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3501                 return;
3502
3503         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3504         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3505 }
3506
3507 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3508 {
3509         struct gaudi_device *gaudi = hdev->asic_specific;
3510         u32 tpc_offset = 0;
3511         int tpc_id;
3512
3513         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3514                 return;
3515
3516         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3517                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3518                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3519         }
3520 }
3521
3522 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3523 {
3524         struct gaudi_device *gaudi = hdev->asic_specific;
3525         u32 nic_mask, nic_offset = 0;
3526         u32 nic_delta_between_qmans =
3527                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3528         u32 nic_delta_between_nics =
3529                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3530         int nic_id;
3531
3532         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3533                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3534
3535                 if (gaudi->hw_cap_initialized & nic_mask)
3536                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3537
3538                 nic_offset += nic_delta_between_qmans;
3539                 if (nic_id & 1) {
3540                         nic_offset -= (nic_delta_between_qmans * 2);
3541                         nic_offset += nic_delta_between_nics;
3542                 }
3543         }
3544 }
3545
3546 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3547 {
3548         struct gaudi_device *gaudi = hdev->asic_specific;
3549
3550         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3551                 return;
3552
3553         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3554         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3556         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3557 }
3558
3559 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3560 {
3561         struct gaudi_device *gaudi = hdev->asic_specific;
3562
3563         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3564                 return;
3565
3566         /* Stop CPs of HBM DMA QMANs */
3567
3568         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3569         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3570         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3571         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3572         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3573 }
3574
3575 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3576 {
3577         struct gaudi_device *gaudi = hdev->asic_specific;
3578
3579         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3580                 return;
3581
3582         /* Stop CPs of MME QMANs */
3583         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3585 }
3586
3587 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3588 {
3589         struct gaudi_device *gaudi = hdev->asic_specific;
3590
3591         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3592                 return;
3593
3594         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3595         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3596         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3597         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3598         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3599         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3600         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3601         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3602 }
3603
3604 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3605 {
3606         struct gaudi_device *gaudi = hdev->asic_specific;
3607
3608         /* Stop upper CPs of QMANs */
3609
3610         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3611                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3612                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615
3616         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3617                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3618                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621
3622         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3623                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3624                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627
3628         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3629                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3630                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633
3634         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3635                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3636                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3639
3640         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3641                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3642                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3645
3646         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3647                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3648                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3651
3652         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3653                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3654                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3655                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3656                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3657
3658         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3659                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3660                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3661                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3662                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3663
3664         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3665                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3666                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3667                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3668                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3669 }
3670
3671 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3672 {
3673         struct gaudi_device *gaudi = hdev->asic_specific;
3674
3675         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3676                 return;
3677
3678         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3679         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3680         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3681 }
3682
3683 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3684 {
3685         struct gaudi_device *gaudi = hdev->asic_specific;
3686
3687         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3688                 return;
3689
3690         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3691         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3692         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3693         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3694         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3695 }
3696
3697 static void gaudi_mme_stall(struct hl_device *hdev)
3698 {
3699         struct gaudi_device *gaudi = hdev->asic_specific;
3700
3701         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3702                 return;
3703
3704         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3705         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3706         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3707         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3708         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3709         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3710         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3711         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3712         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3713         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3714         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3715         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3716         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3717         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3718         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3719         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3720         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3721 }
3722
3723 static void gaudi_tpc_stall(struct hl_device *hdev)
3724 {
3725         struct gaudi_device *gaudi = hdev->asic_specific;
3726
3727         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3728                 return;
3729
3730         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3731         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3732         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3733         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3734         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3735         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3736         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3737         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3738 }
3739
3740 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3741 {
3742         u32 qman_offset;
3743         int i;
3744
3745         if (hdev->asic_prop.fw_security_enabled)
3746                 return;
3747
3748         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3749                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3750                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3751
3752                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3753         }
3754
3755         WREG32(mmMME0_QM_CGM_CFG, 0);
3756         WREG32(mmMME0_QM_CGM_CFG1, 0);
3757         WREG32(mmMME2_QM_CGM_CFG, 0);
3758         WREG32(mmMME2_QM_CGM_CFG1, 0);
3759
3760         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3761                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3762                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3763
3764                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3765         }
3766 }
3767
3768 static void gaudi_enable_timestamp(struct hl_device *hdev)
3769 {
3770         /* Disable the timestamp counter */
3771         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3772
3773         /* Zero the lower/upper parts of the 64-bit counter */
3774         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3775         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3776
3777         /* Enable the counter */
3778         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3779 }
3780
3781 static void gaudi_disable_timestamp(struct hl_device *hdev)
3782 {
3783         /* Disable the timestamp counter */
3784         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3785 }
3786
3787 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3788 {
3789         u32 wait_timeout_ms;
3790
3791         dev_info(hdev->dev,
3792                 "Halting compute engines and disabling interrupts\n");
3793
3794         if (hdev->pldm)
3795                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3796         else
3797                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3798
3799         if (fw_reset)
3800                 goto skip_engines;
3801
3802         gaudi_stop_nic_qmans(hdev);
3803         gaudi_stop_mme_qmans(hdev);
3804         gaudi_stop_tpc_qmans(hdev);
3805         gaudi_stop_hbm_dma_qmans(hdev);
3806         gaudi_stop_pci_dma_qmans(hdev);
3807
3808         msleep(wait_timeout_ms);
3809
3810         gaudi_pci_dma_stall(hdev);
3811         gaudi_hbm_dma_stall(hdev);
3812         gaudi_tpc_stall(hdev);
3813         gaudi_mme_stall(hdev);
3814
3815         msleep(wait_timeout_ms);
3816
3817         gaudi_disable_nic_qmans(hdev);
3818         gaudi_disable_mme_qmans(hdev);
3819         gaudi_disable_tpc_qmans(hdev);
3820         gaudi_disable_hbm_dma_qmans(hdev);
3821         gaudi_disable_pci_dma_qmans(hdev);
3822
3823         gaudi_disable_timestamp(hdev);
3824
3825 skip_engines:
3826         gaudi_disable_msi(hdev);
3827 }
3828
3829 static int gaudi_mmu_init(struct hl_device *hdev)
3830 {
3831         struct asic_fixed_properties *prop = &hdev->asic_prop;
3832         struct gaudi_device *gaudi = hdev->asic_specific;
3833         u64 hop0_addr;
3834         int rc, i;
3835
3836         if (!hdev->mmu_enable)
3837                 return 0;
3838
3839         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3840                 return 0;
3841
3842         for (i = 0 ; i < prop->max_asid ; i++) {
3843                 hop0_addr = prop->mmu_pgt_addr +
3844                                 (i * prop->mmu_hop_table_size);
3845
3846                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3847                 if (rc) {
3848                         dev_err(hdev->dev,
3849                                 "failed to set hop0 addr for asid %d\n", i);
3850                         goto err;
3851                 }
3852         }
3853
3854         /* init MMU cache manage page */
3855         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3856         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3857
3858         /* mem cache invalidation */
3859         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3860
3861         hl_mmu_invalidate_cache(hdev, true, 0);
3862
3863         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3864         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3865
3866         WREG32(mmSTLB_HOP_CONFIGURATION,
3867                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3868
3869         /*
3870          * The H/W expects the first PI after init to be 1. After wraparound
3871          * we'll write 0.
3872          */
3873         gaudi->mmu_cache_inv_pi = 1;
3874
3875         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3876
3877         return 0;
3878
3879 err:
3880         return rc;
3881 }
3882
3883 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3884 {
3885         void __iomem *dst;
3886
3887         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3888
3889         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3890 }
3891
3892 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3893 {
3894         void __iomem *dst;
3895
3896         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3897
3898         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3899 }
3900
3901 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3902 {
3903         struct dynamic_fw_load_mgr *dynamic_loader;
3904         struct cpu_dyn_regs *dyn_regs;
3905
3906         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3907
3908         /*
3909          * here we update initial values for few specific dynamic regs (as
3910          * before reading the first descriptor from FW those value has to be
3911          * hard-coded) in later stages of the protocol those values will be
3912          * updated automatically by reading the FW descriptor so data there
3913          * will always be up-to-date
3914          */
3915         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3916         dyn_regs->kmd_msg_to_cpu =
3917                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3918         dyn_regs->cpu_cmd_status_to_host =
3919                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3920
3921         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3922 }
3923
3924 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3925 {
3926         struct static_fw_load_mgr *static_loader;
3927
3928         static_loader = &hdev->fw_loader.static_loader;
3929
3930         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3931         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3932         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3933         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3934         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3935         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3936         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3937         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3938         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3939         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3940         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3941         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3942         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3943                         GAUDI_PLDM_RESET_WAIT_MSEC :
3944                         GAUDI_CPU_RESET_WAIT_MSEC;
3945 }
3946
3947 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3948 {
3949         struct asic_fixed_properties *prop = &hdev->asic_prop;
3950         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3951
3952         /* fill common fields */
3953         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3954         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3955         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3956         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3957         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3958         fw_loader->skip_bmc = !hdev->bmc_enable;
3959         fw_loader->sram_bar_id = SRAM_BAR_ID;
3960         fw_loader->dram_bar_id = HBM_BAR_ID;
3961
3962         if (prop->dynamic_fw_load)
3963                 gaudi_init_dynamic_firmware_loader(hdev);
3964         else
3965                 gaudi_init_static_firmware_loader(hdev);
3966 }
3967
3968 static int gaudi_init_cpu(struct hl_device *hdev)
3969 {
3970         struct gaudi_device *gaudi = hdev->asic_specific;
3971         int rc;
3972
3973         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3974                 return 0;
3975
3976         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3977                 return 0;
3978
3979         /*
3980          * The device CPU works with 40 bits addresses.
3981          * This register sets the extension to 50 bits.
3982          */
3983         if (!hdev->asic_prop.fw_security_enabled)
3984                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3985
3986         rc = hl_fw_init_cpu(hdev);
3987
3988         if (rc)
3989                 return rc;
3990
3991         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3992
3993         return 0;
3994 }
3995
3996 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3997 {
3998         struct cpu_dyn_regs *dyn_regs =
3999                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4000         struct asic_fixed_properties *prop = &hdev->asic_prop;
4001         struct gaudi_device *gaudi = hdev->asic_specific;
4002         u32 status, irq_handler_offset;
4003         struct hl_eq *eq;
4004         struct hl_hw_queue *cpu_pq =
4005                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4006         int err;
4007
4008         if (!hdev->cpu_queues_enable)
4009                 return 0;
4010
4011         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4012                 return 0;
4013
4014         eq = &hdev->event_queue;
4015
4016         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4017         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4018
4019         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4020         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4021
4022         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4023                         lower_32_bits(hdev->cpu_accessible_dma_address));
4024         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4025                         upper_32_bits(hdev->cpu_accessible_dma_address));
4026
4027         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4028         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4029         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4030
4031         /* Used for EQ CI */
4032         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4033
4034         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4035
4036         if (gaudi->multi_msi_mode)
4037                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4038         else
4039                 WREG32(mmCPU_IF_QUEUE_INIT,
4040                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4041
4042         irq_handler_offset = prop->gic_interrupts_enable ?
4043                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4044                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4045
4046         WREG32(irq_handler_offset,
4047                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4048
4049         err = hl_poll_timeout(
4050                 hdev,
4051                 mmCPU_IF_QUEUE_INIT,
4052                 status,
4053                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4054                 1000,
4055                 cpu_timeout);
4056
4057         if (err) {
4058                 dev_err(hdev->dev,
4059                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4060                 return -EIO;
4061         }
4062
4063         /* update FW application security bits */
4064         if (prop->fw_cpu_boot_dev_sts0_valid)
4065                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4066         if (prop->fw_cpu_boot_dev_sts1_valid)
4067                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4068
4069         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4070         return 0;
4071 }
4072
4073 static void gaudi_pre_hw_init(struct hl_device *hdev)
4074 {
4075         /* Perform read from the device to make sure device is up */
4076         RREG32(mmHW_STATE);
4077
4078         if (!hdev->asic_prop.fw_security_enabled) {
4079                 /* Set the access through PCI bars (Linux driver only) as
4080                  * secured
4081                  */
4082                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4083                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4084                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4085
4086                 /* Perform read to flush the waiting writes to ensure
4087                  * configuration was set in the device
4088                  */
4089                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4090         }
4091
4092         /*
4093          * Let's mark in the H/W that we have reached this point. We check
4094          * this value in the reset_before_init function to understand whether
4095          * we need to reset the chip before doing H/W init. This register is
4096          * cleared by the H/W upon H/W reset
4097          */
4098         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4099 }
4100
4101 static int gaudi_hw_init(struct hl_device *hdev)
4102 {
4103         struct gaudi_device *gaudi = hdev->asic_specific;
4104         int rc;
4105
4106         gaudi_pre_hw_init(hdev);
4107
4108         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4109          * So we set it here and if anyone tries to move it later to
4110          * a different address, there will be an error
4111          */
4112         if (hdev->asic_prop.iatu_done_by_fw)
4113                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4114
4115         /*
4116          * Before pushing u-boot/linux to device, need to set the hbm bar to
4117          * base address of dram
4118          */
4119         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4120                 dev_err(hdev->dev,
4121                         "failed to map HBM bar to DRAM base address\n");
4122                 return -EIO;
4123         }
4124
4125         rc = gaudi_init_cpu(hdev);
4126         if (rc) {
4127                 dev_err(hdev->dev, "failed to initialize CPU\n");
4128                 return rc;
4129         }
4130
4131         /* In case the clock gating was enabled in preboot we need to disable
4132          * it here before touching the MME/TPC registers.
4133          */
4134         gaudi_disable_clock_gating(hdev);
4135
4136         /* SRAM scrambler must be initialized after CPU is running from HBM */
4137         gaudi_init_scrambler_sram(hdev);
4138
4139         /* This is here just in case we are working without CPU */
4140         gaudi_init_scrambler_hbm(hdev);
4141
4142         gaudi_init_golden_registers(hdev);
4143
4144         rc = gaudi_mmu_init(hdev);
4145         if (rc)
4146                 return rc;
4147
4148         gaudi_init_security(hdev);
4149
4150         gaudi_init_pci_dma_qmans(hdev);
4151
4152         gaudi_init_hbm_dma_qmans(hdev);
4153
4154         gaudi_init_mme_qmans(hdev);
4155
4156         gaudi_init_tpc_qmans(hdev);
4157
4158         gaudi_init_nic_qmans(hdev);
4159
4160         gaudi_enable_timestamp(hdev);
4161
4162         /* MSI must be enabled before CPU queues and NIC are initialized */
4163         rc = gaudi_enable_msi(hdev);
4164         if (rc)
4165                 goto disable_queues;
4166
4167         /* must be called after MSI was enabled */
4168         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4169         if (rc) {
4170                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4171                         rc);
4172                 goto disable_msi;
4173         }
4174
4175         /* Perform read from the device to flush all configuration */
4176         RREG32(mmHW_STATE);
4177
4178         return 0;
4179
4180 disable_msi:
4181         gaudi_disable_msi(hdev);
4182 disable_queues:
4183         gaudi_disable_mme_qmans(hdev);
4184         gaudi_disable_pci_dma_qmans(hdev);
4185
4186         return rc;
4187 }
4188
4189 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4190 {
4191         struct cpu_dyn_regs *dyn_regs =
4192                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4193         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4194         struct gaudi_device *gaudi = hdev->asic_specific;
4195         bool driver_performs_reset;
4196
4197         if (!hard_reset) {
4198                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4199                 return;
4200         }
4201
4202         if (hdev->pldm) {
4203                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4204                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4205         } else {
4206                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4207                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4208         }
4209
4210         if (fw_reset) {
4211                 dev_info(hdev->dev,
4212                         "Firmware performs HARD reset, going to wait %dms\n",
4213                         reset_timeout_ms);
4214
4215                 goto skip_reset;
4216         }
4217
4218         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4219                                         !hdev->asic_prop.hard_reset_done_by_fw);
4220
4221         /* Set device to handle FLR by H/W as we will put the device CPU to
4222          * halt mode
4223          */
4224         if (driver_performs_reset)
4225                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4226                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4227
4228         /* If linux is loaded in the device CPU we need to communicate with it
4229          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4230          * registers in case of old F/Ws
4231          */
4232         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4233                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4234                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4235                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4236
4237                 WREG32(irq_handler_offset,
4238                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4239
4240                 /* This is a hail-mary attempt to revive the card in the small chance that the
4241                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4242                  * In that case, triggering reset through GIC won't help. We need to trigger the
4243                  * reset as if Linux wasn't loaded.
4244                  *
4245                  * We do it only if the reset cause was HB, because that would be the indication
4246                  * of such an event.
4247                  *
4248                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4249                  * damage.
4250                  */
4251                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4252                         if (hdev->asic_prop.hard_reset_done_by_fw)
4253                                 hl_fw_ask_hard_reset_without_linux(hdev);
4254                         else
4255                                 hl_fw_ask_halt_machine_without_linux(hdev);
4256                 }
4257         } else {
4258                 if (hdev->asic_prop.hard_reset_done_by_fw)
4259                         hl_fw_ask_hard_reset_without_linux(hdev);
4260                 else
4261                         hl_fw_ask_halt_machine_without_linux(hdev);
4262         }
4263
4264         if (driver_performs_reset) {
4265
4266                 /* Configure the reset registers. Must be done as early as
4267                  * possible in case we fail during H/W initialization
4268                  */
4269                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4270                                                 (CFG_RST_H_DMA_MASK |
4271                                                 CFG_RST_H_MME_MASK |
4272                                                 CFG_RST_H_SM_MASK |
4273                                                 CFG_RST_H_TPC_7_MASK));
4274
4275                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4276
4277                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4278                                                 (CFG_RST_H_HBM_MASK |
4279                                                 CFG_RST_H_TPC_7_MASK |
4280                                                 CFG_RST_H_NIC_MASK |
4281                                                 CFG_RST_H_SM_MASK |
4282                                                 CFG_RST_H_DMA_MASK |
4283                                                 CFG_RST_H_MME_MASK |
4284                                                 CFG_RST_H_CPU_MASK |
4285                                                 CFG_RST_H_MMU_MASK));
4286
4287                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4288                                                 (CFG_RST_L_IF_MASK |
4289                                                 CFG_RST_L_PSOC_MASK |
4290                                                 CFG_RST_L_TPC_MASK));
4291
4292                 msleep(cpu_timeout_ms);
4293
4294                 /* Tell ASIC not to re-initialize PCIe */
4295                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4296
4297                 /* Restart BTL/BLR upon hard-reset */
4298                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4299
4300                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4301                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4302
4303                 dev_info(hdev->dev,
4304                         "Issued HARD reset command, going to wait %dms\n",
4305                         reset_timeout_ms);
4306         } else {
4307                 dev_info(hdev->dev,
4308                         "Firmware performs HARD reset, going to wait %dms\n",
4309                         reset_timeout_ms);
4310         }
4311
4312 skip_reset:
4313         /*
4314          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4315          * itself is in reset. Need to wait until the reset is deasserted
4316          */
4317         msleep(reset_timeout_ms);
4318
4319         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4320         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4321                 dev_err(hdev->dev,
4322                         "Timeout while waiting for device to reset 0x%x\n",
4323                         status);
4324
4325         if (gaudi) {
4326                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4327                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4328                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4329                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4330                                                 HW_CAP_HBM_SCRAMBLER);
4331
4332                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4333
4334                 hdev->device_cpu_is_halted = false;
4335         }
4336 }
4337
4338 static int gaudi_suspend(struct hl_device *hdev)
4339 {
4340         int rc;
4341
4342         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4343         if (rc)
4344                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4345
4346         return rc;
4347 }
4348
4349 static int gaudi_resume(struct hl_device *hdev)
4350 {
4351         return gaudi_init_iatu(hdev);
4352 }
4353
4354 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4355                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4356 {
4357         int rc;
4358
4359         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4360                         VM_DONTCOPY | VM_NORESERVE;
4361
4362         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4363                                 (dma_addr - HOST_PHYS_BASE), size);
4364         if (rc)
4365                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4366
4367         return rc;
4368 }
4369
4370 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4371 {
4372         struct cpu_dyn_regs *dyn_regs =
4373                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4374         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4375         struct gaudi_device *gaudi = hdev->asic_specific;
4376         bool invalid_queue = false;
4377         int dma_id;
4378
4379         switch (hw_queue_id) {
4380         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4381                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4382                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4383                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4384                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4385                 break;
4386
4387         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4388                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4389                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4390                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4391                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4392                 break;
4393
4394         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4395                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4396                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4397                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4398                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4399                 break;
4400
4401         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4402                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4403                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4404                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4405                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4406                 break;
4407
4408         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4409                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4410                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4411                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4412                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4413                 break;
4414
4415         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4416                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4417                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4418                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4419                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4420                 break;
4421
4422         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4423                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4424                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4425                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4426                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4427                 break;
4428
4429         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4430                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4431                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4432                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4433                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4434                 break;
4435
4436         case GAUDI_QUEUE_ID_CPU_PQ:
4437                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4438                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4439                 else
4440                         invalid_queue = true;
4441                 break;
4442
4443         case GAUDI_QUEUE_ID_MME_0_0:
4444                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4445                 break;
4446
4447         case GAUDI_QUEUE_ID_MME_0_1:
4448                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4449                 break;
4450
4451         case GAUDI_QUEUE_ID_MME_0_2:
4452                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4453                 break;
4454
4455         case GAUDI_QUEUE_ID_MME_0_3:
4456                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4457                 break;
4458
4459         case GAUDI_QUEUE_ID_MME_1_0:
4460                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4461                 break;
4462
4463         case GAUDI_QUEUE_ID_MME_1_1:
4464                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4465                 break;
4466
4467         case GAUDI_QUEUE_ID_MME_1_2:
4468                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4469                 break;
4470
4471         case GAUDI_QUEUE_ID_MME_1_3:
4472                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4473                 break;
4474
4475         case GAUDI_QUEUE_ID_TPC_0_0:
4476                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4477                 break;
4478
4479         case GAUDI_QUEUE_ID_TPC_0_1:
4480                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4481                 break;
4482
4483         case GAUDI_QUEUE_ID_TPC_0_2:
4484                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4485                 break;
4486
4487         case GAUDI_QUEUE_ID_TPC_0_3:
4488                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4489                 break;
4490
4491         case GAUDI_QUEUE_ID_TPC_1_0:
4492                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4493                 break;
4494
4495         case GAUDI_QUEUE_ID_TPC_1_1:
4496                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4497                 break;
4498
4499         case GAUDI_QUEUE_ID_TPC_1_2:
4500                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4501                 break;
4502
4503         case GAUDI_QUEUE_ID_TPC_1_3:
4504                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4505                 break;
4506
4507         case GAUDI_QUEUE_ID_TPC_2_0:
4508                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4509                 break;
4510
4511         case GAUDI_QUEUE_ID_TPC_2_1:
4512                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4513                 break;
4514
4515         case GAUDI_QUEUE_ID_TPC_2_2:
4516                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4517                 break;
4518
4519         case GAUDI_QUEUE_ID_TPC_2_3:
4520                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4521                 break;
4522
4523         case GAUDI_QUEUE_ID_TPC_3_0:
4524                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4525                 break;
4526
4527         case GAUDI_QUEUE_ID_TPC_3_1:
4528                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4529                 break;
4530
4531         case GAUDI_QUEUE_ID_TPC_3_2:
4532                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4533                 break;
4534
4535         case GAUDI_QUEUE_ID_TPC_3_3:
4536                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4537                 break;
4538
4539         case GAUDI_QUEUE_ID_TPC_4_0:
4540                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4541                 break;
4542
4543         case GAUDI_QUEUE_ID_TPC_4_1:
4544                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4545                 break;
4546
4547         case GAUDI_QUEUE_ID_TPC_4_2:
4548                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4549                 break;
4550
4551         case GAUDI_QUEUE_ID_TPC_4_3:
4552                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4553                 break;
4554
4555         case GAUDI_QUEUE_ID_TPC_5_0:
4556                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4557                 break;
4558
4559         case GAUDI_QUEUE_ID_TPC_5_1:
4560                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4561                 break;
4562
4563         case GAUDI_QUEUE_ID_TPC_5_2:
4564                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4565                 break;
4566
4567         case GAUDI_QUEUE_ID_TPC_5_3:
4568                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4569                 break;
4570
4571         case GAUDI_QUEUE_ID_TPC_6_0:
4572                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4573                 break;
4574
4575         case GAUDI_QUEUE_ID_TPC_6_1:
4576                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4577                 break;
4578
4579         case GAUDI_QUEUE_ID_TPC_6_2:
4580                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4581                 break;
4582
4583         case GAUDI_QUEUE_ID_TPC_6_3:
4584                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4585                 break;
4586
4587         case GAUDI_QUEUE_ID_TPC_7_0:
4588                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4589                 break;
4590
4591         case GAUDI_QUEUE_ID_TPC_7_1:
4592                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4593                 break;
4594
4595         case GAUDI_QUEUE_ID_TPC_7_2:
4596                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4597                 break;
4598
4599         case GAUDI_QUEUE_ID_TPC_7_3:
4600                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4601                 break;
4602
4603         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4604                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4605                         invalid_queue = true;
4606
4607                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4608                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4609                 break;
4610
4611         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4612                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4613                         invalid_queue = true;
4614
4615                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4616                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4617                 break;
4618
4619         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4620                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4621                         invalid_queue = true;
4622
4623                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4624                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4625                 break;
4626
4627         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4628                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4629                         invalid_queue = true;
4630
4631                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4632                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4633                 break;
4634
4635         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4636                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4637                         invalid_queue = true;
4638
4639                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4640                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4641                 break;
4642
4643         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4644                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4645                         invalid_queue = true;
4646
4647                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4648                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4649                 break;
4650
4651         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4652                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4653                         invalid_queue = true;
4654
4655                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4656                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4657                 break;
4658
4659         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4660                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4661                         invalid_queue = true;
4662
4663                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4664                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4665                 break;
4666
4667         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4668                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4669                         invalid_queue = true;
4670
4671                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4672                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4673                 break;
4674
4675         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4676                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4677                         invalid_queue = true;
4678
4679                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4680                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4681                 break;
4682
4683         default:
4684                 invalid_queue = true;
4685         }
4686
4687         if (invalid_queue) {
4688                 /* Should never get here */
4689                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4690                         hw_queue_id);
4691                 return;
4692         }
4693
4694         db_value = pi;
4695
4696         /* ring the doorbell */
4697         WREG32(db_reg_offset, db_value);
4698
4699         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4700                 /* make sure device CPU will read latest data from host */
4701                 mb();
4702
4703                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4704                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4705                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4706
4707                 WREG32(irq_handler_offset,
4708                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4709         }
4710 }
4711
4712 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4713                                 struct hl_bd *bd)
4714 {
4715         __le64 *pbd = (__le64 *) bd;
4716
4717         /* The QMANs are on the host memory so a simple copy suffice */
4718         pqe[0] = pbd[0];
4719         pqe[1] = pbd[1];
4720 }
4721
4722 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4723                                         dma_addr_t *dma_handle, gfp_t flags)
4724 {
4725         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4726                                                 dma_handle, flags);
4727
4728         /* Shift to the device's base physical address of host memory */
4729         if (kernel_addr)
4730                 *dma_handle += HOST_PHYS_BASE;
4731
4732         return kernel_addr;
4733 }
4734
4735 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4736                 void *cpu_addr, dma_addr_t dma_handle)
4737 {
4738         /* Cancel the device's base physical address of host memory */
4739         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4740
4741         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4742 }
4743
4744 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4745 {
4746         struct asic_fixed_properties *prop = &hdev->asic_prop;
4747         u64  cur_addr = DRAM_BASE_ADDR_USER;
4748         u32 val;
4749         u32 chunk_size;
4750         int rc, dma_id;
4751
4752         while (cur_addr < prop->dram_end_address) {
4753                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4754                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4755
4756                         chunk_size =
4757                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4758
4759                         dev_dbg(hdev->dev,
4760                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4761                                 cur_addr, cur_addr + chunk_size);
4762
4763                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4764                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4765                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4766                                                 lower_32_bits(cur_addr));
4767                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4768                                                 upper_32_bits(cur_addr));
4769                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4770                                         chunk_size);
4771                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4772                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4773                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4774
4775                         cur_addr += chunk_size;
4776
4777                         if (cur_addr == prop->dram_end_address)
4778                                 break;
4779                 }
4780
4781                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4782                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4783
4784                         rc = hl_poll_timeout(
4785                                 hdev,
4786                                 mmDMA0_CORE_STS0 + dma_offset,
4787                                 val,
4788                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4789                                 1000,
4790                                 HBM_SCRUBBING_TIMEOUT_US);
4791
4792                         if (rc) {
4793                                 dev_err(hdev->dev,
4794                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4795                                         dma_id);
4796                                 return -EIO;
4797                         }
4798                 }
4799         }
4800
4801         return 0;
4802 }
4803
4804 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4805 {
4806         struct asic_fixed_properties *prop = &hdev->asic_prop;
4807         int rc = 0;
4808         u64 val = 0;
4809
4810         if (!hdev->memory_scrub)
4811                 return 0;
4812
4813         if (!addr && !size) {
4814                 /* Wait till device is idle */
4815                 rc = hl_poll_timeout(
4816                                 hdev,
4817                                 mmDMA0_CORE_STS0/* dummy */,
4818                                 val/* dummy */,
4819                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4820                                                 0, NULL)),
4821                                                 1000,
4822                                                 HBM_SCRUBBING_TIMEOUT_US);
4823                 if (rc) {
4824                         dev_err(hdev->dev, "waiting for idle timeout\n");
4825                         return -EIO;
4826                 }
4827
4828                 /* Scrub SRAM */
4829                 addr = prop->sram_user_base_address;
4830                 size = hdev->pldm ? 0x10000 :
4831                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4832                 val = 0x7777777777777777ull;
4833
4834                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4835                 if (rc) {
4836                         dev_err(hdev->dev,
4837                                 "Failed to clear SRAM in mem scrub all\n");
4838                         return rc;
4839                 }
4840
4841                 /* Scrub HBM using all DMA channels in parallel */
4842                 rc = gaudi_hbm_scrubbing(hdev);
4843                 if (rc)
4844                         dev_err(hdev->dev,
4845                                 "Failed to clear HBM in mem scrub all\n");
4846         }
4847
4848         return rc;
4849 }
4850
4851 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4852                                 u32 queue_id, dma_addr_t *dma_handle,
4853                                 u16 *queue_len)
4854 {
4855         struct gaudi_device *gaudi = hdev->asic_specific;
4856         struct gaudi_internal_qman_info *q;
4857
4858         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4859                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4860                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4861                 return NULL;
4862         }
4863
4864         q = &gaudi->internal_qmans[queue_id];
4865         *dma_handle = q->pq_dma_addr;
4866         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4867
4868         return q->pq_kernel_addr;
4869 }
4870
4871 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4872                                 u16 len, u32 timeout, u64 *result)
4873 {
4874         struct gaudi_device *gaudi = hdev->asic_specific;
4875
4876         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4877                 if (result)
4878                         *result = 0;
4879                 return 0;
4880         }
4881
4882         if (!timeout)
4883                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4884
4885         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4886                                                 timeout, result);
4887 }
4888
4889 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4890 {
4891         struct packet_msg_prot *fence_pkt;
4892         dma_addr_t pkt_dma_addr;
4893         u32 fence_val, tmp, timeout_usec;
4894         dma_addr_t fence_dma_addr;
4895         u32 *fence_ptr;
4896         int rc;
4897
4898         if (hdev->pldm)
4899                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4900         else
4901                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4902
4903         fence_val = GAUDI_QMAN0_FENCE_VAL;
4904
4905         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4906                                                         &fence_dma_addr);
4907         if (!fence_ptr) {
4908                 dev_err(hdev->dev,
4909                         "Failed to allocate memory for H/W queue %d testing\n",
4910                         hw_queue_id);
4911                 return -ENOMEM;
4912         }
4913
4914         *fence_ptr = 0;
4915
4916         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4917                                         sizeof(struct packet_msg_prot),
4918                                         GFP_KERNEL, &pkt_dma_addr);
4919         if (!fence_pkt) {
4920                 dev_err(hdev->dev,
4921                         "Failed to allocate packet for H/W queue %d testing\n",
4922                         hw_queue_id);
4923                 rc = -ENOMEM;
4924                 goto free_fence_ptr;
4925         }
4926
4927         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4928         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4929         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4930
4931         fence_pkt->ctl = cpu_to_le32(tmp);
4932         fence_pkt->value = cpu_to_le32(fence_val);
4933         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4934
4935         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4936                                         sizeof(struct packet_msg_prot),
4937                                         pkt_dma_addr);
4938         if (rc) {
4939                 dev_err(hdev->dev,
4940                         "Failed to send fence packet to H/W queue %d\n",
4941                         hw_queue_id);
4942                 goto free_pkt;
4943         }
4944
4945         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4946                                         1000, timeout_usec, true);
4947
4948         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4949
4950         if (rc == -ETIMEDOUT) {
4951                 dev_err(hdev->dev,
4952                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4953                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4954                 rc = -EIO;
4955         }
4956
4957 free_pkt:
4958         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4959                                         pkt_dma_addr);
4960 free_fence_ptr:
4961         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4962                                         fence_dma_addr);
4963         return rc;
4964 }
4965
4966 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4967 {
4968         struct gaudi_device *gaudi = hdev->asic_specific;
4969
4970         /*
4971          * check capability here as send_cpu_message() won't update the result
4972          * value if no capability
4973          */
4974         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4975                 return 0;
4976
4977         return hl_fw_test_cpu_queue(hdev);
4978 }
4979
4980 static int gaudi_test_queues(struct hl_device *hdev)
4981 {
4982         int i, rc, ret_val = 0;
4983
4984         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4985                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4986                         rc = gaudi_test_queue(hdev, i);
4987                         if (rc)
4988                                 ret_val = -EINVAL;
4989                 }
4990         }
4991
4992         rc = gaudi_test_cpu_queue(hdev);
4993         if (rc)
4994                 ret_val = -EINVAL;
4995
4996         return ret_val;
4997 }
4998
4999 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5000                 gfp_t mem_flags, dma_addr_t *dma_handle)
5001 {
5002         void *kernel_addr;
5003
5004         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5005                 return NULL;
5006
5007         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5008
5009         /* Shift to the device's base physical address of host memory */
5010         if (kernel_addr)
5011                 *dma_handle += HOST_PHYS_BASE;
5012
5013         return kernel_addr;
5014 }
5015
5016 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5017                         dma_addr_t dma_addr)
5018 {
5019         /* Cancel the device's base physical address of host memory */
5020         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5021
5022         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5023 }
5024
5025 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5026                                         size_t size, dma_addr_t *dma_handle)
5027 {
5028         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5029 }
5030
5031 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5032                                                 size_t size, void *vaddr)
5033 {
5034         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5035 }
5036
5037 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5038                         int nents, enum dma_data_direction dir)
5039 {
5040         struct scatterlist *sg;
5041         int i;
5042
5043         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5044                 return -ENOMEM;
5045
5046         /* Shift to the device's base physical address of host memory */
5047         for_each_sg(sgl, sg, nents, i)
5048                 sg->dma_address += HOST_PHYS_BASE;
5049
5050         return 0;
5051 }
5052
5053 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5054                         int nents, enum dma_data_direction dir)
5055 {
5056         struct scatterlist *sg;
5057         int i;
5058
5059         /* Cancel the device's base physical address of host memory */
5060         for_each_sg(sgl, sg, nents, i)
5061                 sg->dma_address -= HOST_PHYS_BASE;
5062
5063         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5064 }
5065
5066 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5067                                         struct sg_table *sgt)
5068 {
5069         struct scatterlist *sg, *sg_next_iter;
5070         u32 count, dma_desc_cnt;
5071         u64 len, len_next;
5072         dma_addr_t addr, addr_next;
5073
5074         dma_desc_cnt = 0;
5075
5076         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5077
5078                 len = sg_dma_len(sg);
5079                 addr = sg_dma_address(sg);
5080
5081                 if (len == 0)
5082                         break;
5083
5084                 while ((count + 1) < sgt->nents) {
5085                         sg_next_iter = sg_next(sg);
5086                         len_next = sg_dma_len(sg_next_iter);
5087                         addr_next = sg_dma_address(sg_next_iter);
5088
5089                         if (len_next == 0)
5090                                 break;
5091
5092                         if ((addr + len == addr_next) &&
5093                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5094                                 len += len_next;
5095                                 count++;
5096                                 sg = sg_next_iter;
5097                         } else {
5098                                 break;
5099                         }
5100                 }
5101
5102                 dma_desc_cnt++;
5103         }
5104
5105         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5106 }
5107
5108 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5109                                 struct hl_cs_parser *parser,
5110                                 struct packet_lin_dma *user_dma_pkt,
5111                                 u64 addr, enum dma_data_direction dir)
5112 {
5113         struct hl_userptr *userptr;
5114         int rc;
5115
5116         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5117                         parser->job_userptr_list, &userptr))
5118                 goto already_pinned;
5119
5120         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5121         if (!userptr)
5122                 return -ENOMEM;
5123
5124         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5125                                 userptr);
5126         if (rc)
5127                 goto free_userptr;
5128
5129         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5130
5131         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5132                                         userptr->sgt->nents, dir);
5133         if (rc) {
5134                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5135                 goto unpin_memory;
5136         }
5137
5138         userptr->dma_mapped = true;
5139         userptr->dir = dir;
5140
5141 already_pinned:
5142         parser->patched_cb_size +=
5143                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5144
5145         return 0;
5146
5147 unpin_memory:
5148         list_del(&userptr->job_node);
5149         hl_unpin_host_memory(hdev, userptr);
5150 free_userptr:
5151         kfree(userptr);
5152         return rc;
5153 }
5154
5155 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5156                                 struct hl_cs_parser *parser,
5157                                 struct packet_lin_dma *user_dma_pkt,
5158                                 bool src_in_host)
5159 {
5160         enum dma_data_direction dir;
5161         bool skip_host_mem_pin = false, user_memset;
5162         u64 addr;
5163         int rc = 0;
5164
5165         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5166                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5167                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5168
5169         if (src_in_host) {
5170                 if (user_memset)
5171                         skip_host_mem_pin = true;
5172
5173                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5174                 dir = DMA_TO_DEVICE;
5175                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5176         } else {
5177                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5178                 dir = DMA_FROM_DEVICE;
5179                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5180                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5181                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5182         }
5183
5184         if (skip_host_mem_pin)
5185                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5186         else
5187                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5188                                                 addr, dir);
5189
5190         return rc;
5191 }
5192
5193 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5194                                 struct hl_cs_parser *parser,
5195                                 struct packet_lin_dma *user_dma_pkt)
5196 {
5197         bool src_in_host = false;
5198         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5199                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5200                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5201
5202         dev_dbg(hdev->dev, "DMA packet details:\n");
5203         dev_dbg(hdev->dev, "source == 0x%llx\n",
5204                                 le64_to_cpu(user_dma_pkt->src_addr));
5205         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5206         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5207
5208         /*
5209          * Special handling for DMA with size 0. Bypass all validations
5210          * because no transactions will be done except for WR_COMP, which
5211          * is not a security issue
5212          */
5213         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5214                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5215                 return 0;
5216         }
5217
5218         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5219                 src_in_host = true;
5220
5221         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5222                                                 src_in_host);
5223 }
5224
5225 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5226                                         struct hl_cs_parser *parser,
5227                                         struct packet_load_and_exe *user_pkt)
5228 {
5229         u32 cfg;
5230
5231         cfg = le32_to_cpu(user_pkt->cfg);
5232
5233         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5234                 dev_err(hdev->dev,
5235                         "User not allowed to use Load and Execute\n");
5236                 return -EPERM;
5237         }
5238
5239         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5240
5241         return 0;
5242 }
5243
5244 static int gaudi_validate_cb(struct hl_device *hdev,
5245                         struct hl_cs_parser *parser, bool is_mmu)
5246 {
5247         u32 cb_parsed_length = 0;
5248         int rc = 0;
5249
5250         parser->patched_cb_size = 0;
5251
5252         /* cb_user_size is more than 0 so loop will always be executed */
5253         while (cb_parsed_length < parser->user_cb_size) {
5254                 enum packet_id pkt_id;
5255                 u16 pkt_size;
5256                 struct gaudi_packet *user_pkt;
5257
5258                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5259
5260                 pkt_id = (enum packet_id) (
5261                                 (le64_to_cpu(user_pkt->header) &
5262                                 PACKET_HEADER_PACKET_ID_MASK) >>
5263                                         PACKET_HEADER_PACKET_ID_SHIFT);
5264
5265                 if (!validate_packet_id(pkt_id)) {
5266                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5267                         rc = -EINVAL;
5268                         break;
5269                 }
5270
5271                 pkt_size = gaudi_packet_sizes[pkt_id];
5272                 cb_parsed_length += pkt_size;
5273                 if (cb_parsed_length > parser->user_cb_size) {
5274                         dev_err(hdev->dev,
5275                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5276                         rc = -EINVAL;
5277                         break;
5278                 }
5279
5280                 switch (pkt_id) {
5281                 case PACKET_MSG_PROT:
5282                         dev_err(hdev->dev,
5283                                 "User not allowed to use MSG_PROT\n");
5284                         rc = -EPERM;
5285                         break;
5286
5287                 case PACKET_CP_DMA:
5288                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5289                         rc = -EPERM;
5290                         break;
5291
5292                 case PACKET_STOP:
5293                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5294                         rc = -EPERM;
5295                         break;
5296
5297                 case PACKET_WREG_BULK:
5298                         dev_err(hdev->dev,
5299                                 "User not allowed to use WREG_BULK\n");
5300                         rc = -EPERM;
5301                         break;
5302
5303                 case PACKET_LOAD_AND_EXE:
5304                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5305                                 (struct packet_load_and_exe *) user_pkt);
5306                         break;
5307
5308                 case PACKET_LIN_DMA:
5309                         parser->contains_dma_pkt = true;
5310                         if (is_mmu)
5311                                 parser->patched_cb_size += pkt_size;
5312                         else
5313                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5314                                         (struct packet_lin_dma *) user_pkt);
5315                         break;
5316
5317                 case PACKET_WREG_32:
5318                 case PACKET_MSG_LONG:
5319                 case PACKET_MSG_SHORT:
5320                 case PACKET_REPEAT:
5321                 case PACKET_FENCE:
5322                 case PACKET_NOP:
5323                 case PACKET_ARB_POINT:
5324                         parser->patched_cb_size += pkt_size;
5325                         break;
5326
5327                 default:
5328                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5329                                 pkt_id);
5330                         rc = -EINVAL;
5331                         break;
5332                 }
5333
5334                 if (rc)
5335                         break;
5336         }
5337
5338         /*
5339          * The new CB should have space at the end for two MSG_PROT packets:
5340          * 1. A packet that will act as a completion packet
5341          * 2. A packet that will generate MSI-X interrupt
5342          */
5343         if (parser->completion)
5344                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5345
5346         return rc;
5347 }
5348
5349 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5350                                 struct hl_cs_parser *parser,
5351                                 struct packet_lin_dma *user_dma_pkt,
5352                                 struct packet_lin_dma *new_dma_pkt,
5353                                 u32 *new_dma_pkt_size)
5354 {
5355         struct hl_userptr *userptr;
5356         struct scatterlist *sg, *sg_next_iter;
5357         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5358         u64 len, len_next;
5359         dma_addr_t dma_addr, dma_addr_next;
5360         u64 device_memory_addr, addr;
5361         enum dma_data_direction dir;
5362         struct sg_table *sgt;
5363         bool src_in_host = false;
5364         bool skip_host_mem_pin = false;
5365         bool user_memset;
5366
5367         ctl = le32_to_cpu(user_dma_pkt->ctl);
5368
5369         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5370                 src_in_host = true;
5371
5372         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5373                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5374
5375         if (src_in_host) {
5376                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5377                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5378                 dir = DMA_TO_DEVICE;
5379                 if (user_memset)
5380                         skip_host_mem_pin = true;
5381         } else {
5382                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5383                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5384                 dir = DMA_FROM_DEVICE;
5385         }
5386
5387         if ((!skip_host_mem_pin) &&
5388                 (!hl_userptr_is_pinned(hdev, addr,
5389                                         le32_to_cpu(user_dma_pkt->tsize),
5390                                         parser->job_userptr_list, &userptr))) {
5391                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5392                                 addr, user_dma_pkt->tsize);
5393                 return -EFAULT;
5394         }
5395
5396         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5397                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5398                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5399                 return 0;
5400         }
5401
5402         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5403
5404         sgt = userptr->sgt;
5405         dma_desc_cnt = 0;
5406
5407         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5408                 len = sg_dma_len(sg);
5409                 dma_addr = sg_dma_address(sg);
5410
5411                 if (len == 0)
5412                         break;
5413
5414                 while ((count + 1) < sgt->nents) {
5415                         sg_next_iter = sg_next(sg);
5416                         len_next = sg_dma_len(sg_next_iter);
5417                         dma_addr_next = sg_dma_address(sg_next_iter);
5418
5419                         if (len_next == 0)
5420                                 break;
5421
5422                         if ((dma_addr + len == dma_addr_next) &&
5423                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5424                                 len += len_next;
5425                                 count++;
5426                                 sg = sg_next_iter;
5427                         } else {
5428                                 break;
5429                         }
5430                 }
5431
5432                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5433                 if (likely(dma_desc_cnt))
5434                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5435                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5436                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5437                 new_dma_pkt->tsize = cpu_to_le32(len);
5438
5439                 if (dir == DMA_TO_DEVICE) {
5440                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5441                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5442                 } else {
5443                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5444                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5445                 }
5446
5447                 if (!user_memset)
5448                         device_memory_addr += len;
5449                 dma_desc_cnt++;
5450                 new_dma_pkt++;
5451         }
5452
5453         if (!dma_desc_cnt) {
5454                 dev_err(hdev->dev,
5455                         "Error of 0 SG entries when patching DMA packet\n");
5456                 return -EFAULT;
5457         }
5458
5459         /* Fix the last dma packet - wrcomp must be as user set it */
5460         new_dma_pkt--;
5461         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5462
5463         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5464
5465         return 0;
5466 }
5467
5468 static int gaudi_patch_cb(struct hl_device *hdev,
5469                                 struct hl_cs_parser *parser)
5470 {
5471         u32 cb_parsed_length = 0;
5472         u32 cb_patched_cur_length = 0;
5473         int rc = 0;
5474
5475         /* cb_user_size is more than 0 so loop will always be executed */
5476         while (cb_parsed_length < parser->user_cb_size) {
5477                 enum packet_id pkt_id;
5478                 u16 pkt_size;
5479                 u32 new_pkt_size = 0;
5480                 struct gaudi_packet *user_pkt, *kernel_pkt;
5481
5482                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5483                 kernel_pkt = parser->patched_cb->kernel_address +
5484                                         cb_patched_cur_length;
5485
5486                 pkt_id = (enum packet_id) (
5487                                 (le64_to_cpu(user_pkt->header) &
5488                                 PACKET_HEADER_PACKET_ID_MASK) >>
5489                                         PACKET_HEADER_PACKET_ID_SHIFT);
5490
5491                 if (!validate_packet_id(pkt_id)) {
5492                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5493                         rc = -EINVAL;
5494                         break;
5495                 }
5496
5497                 pkt_size = gaudi_packet_sizes[pkt_id];
5498                 cb_parsed_length += pkt_size;
5499                 if (cb_parsed_length > parser->user_cb_size) {
5500                         dev_err(hdev->dev,
5501                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5502                         rc = -EINVAL;
5503                         break;
5504                 }
5505
5506                 switch (pkt_id) {
5507                 case PACKET_LIN_DMA:
5508                         rc = gaudi_patch_dma_packet(hdev, parser,
5509                                         (struct packet_lin_dma *) user_pkt,
5510                                         (struct packet_lin_dma *) kernel_pkt,
5511                                         &new_pkt_size);
5512                         cb_patched_cur_length += new_pkt_size;
5513                         break;
5514
5515                 case PACKET_MSG_PROT:
5516                         dev_err(hdev->dev,
5517                                 "User not allowed to use MSG_PROT\n");
5518                         rc = -EPERM;
5519                         break;
5520
5521                 case PACKET_CP_DMA:
5522                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5523                         rc = -EPERM;
5524                         break;
5525
5526                 case PACKET_STOP:
5527                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5528                         rc = -EPERM;
5529                         break;
5530
5531                 case PACKET_WREG_32:
5532                 case PACKET_WREG_BULK:
5533                 case PACKET_MSG_LONG:
5534                 case PACKET_MSG_SHORT:
5535                 case PACKET_REPEAT:
5536                 case PACKET_FENCE:
5537                 case PACKET_NOP:
5538                 case PACKET_ARB_POINT:
5539                 case PACKET_LOAD_AND_EXE:
5540                         memcpy(kernel_pkt, user_pkt, pkt_size);
5541                         cb_patched_cur_length += pkt_size;
5542                         break;
5543
5544                 default:
5545                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5546                                 pkt_id);
5547                         rc = -EINVAL;
5548                         break;
5549                 }
5550
5551                 if (rc)
5552                         break;
5553         }
5554
5555         return rc;
5556 }
5557
5558 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5559                 struct hl_cs_parser *parser)
5560 {
5561         u64 patched_cb_handle;
5562         u32 patched_cb_size;
5563         struct hl_cb *user_cb;
5564         int rc;
5565
5566         /*
5567          * The new CB should have space at the end for two MSG_PROT pkt:
5568          * 1. A packet that will act as a completion packet
5569          * 2. A packet that will generate MSI interrupt
5570          */
5571         if (parser->completion)
5572                 parser->patched_cb_size = parser->user_cb_size +
5573                                 sizeof(struct packet_msg_prot) * 2;
5574         else
5575                 parser->patched_cb_size = parser->user_cb_size;
5576
5577         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5578                                 parser->patched_cb_size, false, false,
5579                                 &patched_cb_handle);
5580
5581         if (rc) {
5582                 dev_err(hdev->dev,
5583                         "Failed to allocate patched CB for DMA CS %d\n",
5584                         rc);
5585                 return rc;
5586         }
5587
5588         patched_cb_handle >>= PAGE_SHIFT;
5589         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5590                                 (u32) patched_cb_handle);
5591         /* hl_cb_get should never fail */
5592         if (!parser->patched_cb) {
5593                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5594                         (u32) patched_cb_handle);
5595                 rc = -EFAULT;
5596                 goto out;
5597         }
5598
5599         /*
5600          * The check that parser->user_cb_size <= parser->user_cb->size was done
5601          * in validate_queue_index().
5602          */
5603         memcpy(parser->patched_cb->kernel_address,
5604                 parser->user_cb->kernel_address,
5605                 parser->user_cb_size);
5606
5607         patched_cb_size = parser->patched_cb_size;
5608
5609         /* Validate patched CB instead of user CB */
5610         user_cb = parser->user_cb;
5611         parser->user_cb = parser->patched_cb;
5612         rc = gaudi_validate_cb(hdev, parser, true);
5613         parser->user_cb = user_cb;
5614
5615         if (rc) {
5616                 hl_cb_put(parser->patched_cb);
5617                 goto out;
5618         }
5619
5620         if (patched_cb_size != parser->patched_cb_size) {
5621                 dev_err(hdev->dev, "user CB size mismatch\n");
5622                 hl_cb_put(parser->patched_cb);
5623                 rc = -EINVAL;
5624                 goto out;
5625         }
5626
5627 out:
5628         /*
5629          * Always call cb destroy here because we still have 1 reference
5630          * to it by calling cb_get earlier. After the job will be completed,
5631          * cb_put will release it, but here we want to remove it from the
5632          * idr
5633          */
5634         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5635                                         patched_cb_handle << PAGE_SHIFT);
5636
5637         return rc;
5638 }
5639
5640 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5641                 struct hl_cs_parser *parser)
5642 {
5643         u64 patched_cb_handle;
5644         int rc;
5645
5646         rc = gaudi_validate_cb(hdev, parser, false);
5647
5648         if (rc)
5649                 goto free_userptr;
5650
5651         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5652                                 parser->patched_cb_size, false, false,
5653                                 &patched_cb_handle);
5654         if (rc) {
5655                 dev_err(hdev->dev,
5656                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5657                 goto free_userptr;
5658         }
5659
5660         patched_cb_handle >>= PAGE_SHIFT;
5661         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5662                                 (u32) patched_cb_handle);
5663         /* hl_cb_get should never fail here */
5664         if (!parser->patched_cb) {
5665                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5666                                 (u32) patched_cb_handle);
5667                 rc = -EFAULT;
5668                 goto out;
5669         }
5670
5671         rc = gaudi_patch_cb(hdev, parser);
5672
5673         if (rc)
5674                 hl_cb_put(parser->patched_cb);
5675
5676 out:
5677         /*
5678          * Always call cb destroy here because we still have 1 reference
5679          * to it by calling cb_get earlier. After the job will be completed,
5680          * cb_put will release it, but here we want to remove it from the
5681          * idr
5682          */
5683         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5684                                 patched_cb_handle << PAGE_SHIFT);
5685
5686 free_userptr:
5687         if (rc)
5688                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5689         return rc;
5690 }
5691
5692 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5693                                         struct hl_cs_parser *parser)
5694 {
5695         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5696         struct gaudi_device *gaudi = hdev->asic_specific;
5697         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5698                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5699
5700         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5701                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5702                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5703                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5704                                 parser->hw_queue_id);
5705                 return -EINVAL;
5706         }
5707
5708         /* For internal queue jobs just check if CB address is valid */
5709         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5710                                         parser->user_cb_size,
5711                                         asic_prop->sram_user_base_address,
5712                                         asic_prop->sram_end_address))
5713                 return 0;
5714
5715         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5716                                         parser->user_cb_size,
5717                                         asic_prop->dram_user_base_address,
5718                                         asic_prop->dram_end_address))
5719                 return 0;
5720
5721         /* PMMU and HPMMU addresses are equal, check only one of them */
5722         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5723                                         parser->user_cb_size,
5724                                         asic_prop->pmmu.start_addr,
5725                                         asic_prop->pmmu.end_addr))
5726                 return 0;
5727
5728         dev_err(hdev->dev,
5729                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5730                 parser->user_cb, parser->user_cb_size);
5731
5732         return -EFAULT;
5733 }
5734
5735 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5736 {
5737         struct gaudi_device *gaudi = hdev->asic_specific;
5738
5739         if (parser->queue_type == QUEUE_TYPE_INT)
5740                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5741
5742         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5743                 return gaudi_parse_cb_mmu(hdev, parser);
5744         else
5745                 return gaudi_parse_cb_no_mmu(hdev, parser);
5746 }
5747
5748 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5749                                         void *kernel_address, u32 len,
5750                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5751                                         bool eb)
5752 {
5753         struct gaudi_device *gaudi = hdev->asic_specific;
5754         struct packet_msg_prot *cq_pkt;
5755         u64 msi_addr;
5756         u32 tmp;
5757
5758         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5759
5760         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5761         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5762
5763         if (eb)
5764                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5765
5766         cq_pkt->ctl = cpu_to_le32(tmp);
5767         cq_pkt->value = cpu_to_le32(cq_val);
5768         cq_pkt->addr = cpu_to_le64(cq_addr);
5769
5770         cq_pkt++;
5771
5772         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5773         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5774         cq_pkt->ctl = cpu_to_le32(tmp);
5775         cq_pkt->value = cpu_to_le32(1);
5776
5777         if (gaudi->multi_msi_mode)
5778                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5779         else
5780                 msi_addr = mmPCIE_CORE_MSI_REQ;
5781
5782         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5783 }
5784
5785 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5786 {
5787         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5788 }
5789
5790 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5791                                         u32 size, u64 val)
5792 {
5793         struct packet_lin_dma *lin_dma_pkt;
5794         struct hl_cs_job *job;
5795         u32 cb_size, ctl, err_cause;
5796         struct hl_cb *cb;
5797         u64 id;
5798         int rc;
5799
5800         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5801         if (!cb)
5802                 return -EFAULT;
5803
5804         lin_dma_pkt = cb->kernel_address;
5805         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5806         cb_size = sizeof(*lin_dma_pkt);
5807
5808         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5809         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5810         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5811         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5812         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5813
5814         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5815         lin_dma_pkt->src_addr = cpu_to_le64(val);
5816         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5817         lin_dma_pkt->tsize = cpu_to_le32(size);
5818
5819         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5820         if (!job) {
5821                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5822                 rc = -ENOMEM;
5823                 goto release_cb;
5824         }
5825
5826         /* Verify DMA is OK */
5827         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5828         if (err_cause && !hdev->init_done) {
5829                 dev_dbg(hdev->dev,
5830                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5831                         err_cause);
5832                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5833         }
5834
5835         job->id = 0;
5836         job->user_cb = cb;
5837         atomic_inc(&job->user_cb->cs_cnt);
5838         job->user_cb_size = cb_size;
5839         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5840         job->patched_cb = job->user_cb;
5841         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5842
5843         hl_debugfs_add_job(hdev, job);
5844
5845         rc = gaudi_send_job_on_qman0(hdev, job);
5846         hl_debugfs_remove_job(hdev, job);
5847         kfree(job);
5848         atomic_dec(&cb->cs_cnt);
5849
5850         /* Verify DMA is OK */
5851         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5852         if (err_cause) {
5853                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5854                 rc = -EIO;
5855                 if (!hdev->init_done) {
5856                         dev_dbg(hdev->dev,
5857                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5858                                 err_cause);
5859                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5860                 }
5861         }
5862
5863 release_cb:
5864         id = cb->id;
5865         hl_cb_put(cb);
5866         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5867
5868         return rc;
5869 }
5870
5871 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5872                                         u32 num_regs, u32 val)
5873 {
5874         struct packet_msg_long *pkt;
5875         struct hl_cs_job *job;
5876         u32 cb_size, ctl;
5877         struct hl_cb *cb;
5878         int i, rc;
5879
5880         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5881
5882         if (cb_size > SZ_2M) {
5883                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5884                 return -ENOMEM;
5885         }
5886
5887         cb = hl_cb_kernel_create(hdev, cb_size, false);
5888         if (!cb)
5889                 return -EFAULT;
5890
5891         pkt = cb->kernel_address;
5892
5893         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5894         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5895         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5896         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5897         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5898
5899         for (i = 0; i < num_regs ; i++, pkt++) {
5900                 pkt->ctl = cpu_to_le32(ctl);
5901                 pkt->value = cpu_to_le32(val);
5902                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5903         }
5904
5905         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5906         if (!job) {
5907                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5908                 rc = -ENOMEM;
5909                 goto release_cb;
5910         }
5911
5912         job->id = 0;
5913         job->user_cb = cb;
5914         atomic_inc(&job->user_cb->cs_cnt);
5915         job->user_cb_size = cb_size;
5916         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5917         job->patched_cb = job->user_cb;
5918         job->job_cb_size = cb_size;
5919
5920         hl_debugfs_add_job(hdev, job);
5921
5922         rc = gaudi_send_job_on_qman0(hdev, job);
5923         hl_debugfs_remove_job(hdev, job);
5924         kfree(job);
5925         atomic_dec(&cb->cs_cnt);
5926
5927 release_cb:
5928         hl_cb_put(cb);
5929         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5930
5931         return rc;
5932 }
5933
5934 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5935 {
5936         u64 base_addr;
5937         u32 num_regs;
5938         int rc;
5939
5940         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5941         num_regs = NUM_OF_SOB_IN_BLOCK;
5942         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5943         if (rc) {
5944                 dev_err(hdev->dev, "failed resetting SM registers");
5945                 return -ENOMEM;
5946         }
5947
5948         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5949         num_regs = NUM_OF_SOB_IN_BLOCK;
5950         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5951         if (rc) {
5952                 dev_err(hdev->dev, "failed resetting SM registers");
5953                 return -ENOMEM;
5954         }
5955
5956         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5957         num_regs = NUM_OF_SOB_IN_BLOCK;
5958         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5959         if (rc) {
5960                 dev_err(hdev->dev, "failed resetting SM registers");
5961                 return -ENOMEM;
5962         }
5963
5964         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5965         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5966         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5967         if (rc) {
5968                 dev_err(hdev->dev, "failed resetting SM registers");
5969                 return -ENOMEM;
5970         }
5971
5972         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5973         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5974         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5975         if (rc) {
5976                 dev_err(hdev->dev, "failed resetting SM registers");
5977                 return -ENOMEM;
5978         }
5979
5980         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5981         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5982         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5983         if (rc) {
5984                 dev_err(hdev->dev, "failed resetting SM registers");
5985                 return -ENOMEM;
5986         }
5987
5988         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5989                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5990         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5991         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5992         if (rc) {
5993                 dev_err(hdev->dev, "failed resetting SM registers");
5994                 return -ENOMEM;
5995         }
5996
5997         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5998                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5999         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6000         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001         if (rc) {
6002                 dev_err(hdev->dev, "failed resetting SM registers");
6003                 return -ENOMEM;
6004         }
6005
6006         return 0;
6007 }
6008
6009 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6010 {
6011         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6012                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6013         int i;
6014
6015         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6016                 u64 sob_addr = CFG_BASE +
6017                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6018                                 (i * sob_delta);
6019                 u32 dma_offset = i * DMA_CORE_OFFSET;
6020
6021                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6022                                 lower_32_bits(sob_addr));
6023                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6024                                 upper_32_bits(sob_addr));
6025                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6026
6027                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6028                  * modified by the user for SRAM reduction
6029                  */
6030                 if (i > 1)
6031                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6032                                                                 0x00000001);
6033         }
6034 }
6035
6036 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6037 {
6038         u32 qman_offset;
6039         int i;
6040
6041         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6042                 qman_offset = i * DMA_QMAN_OFFSET;
6043                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6044         }
6045
6046         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6047                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6048                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6049         }
6050
6051         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6052                 qman_offset = i * TPC_QMAN_OFFSET;
6053                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6054         }
6055
6056         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6057                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6058                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6059                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6060         }
6061 }
6062
6063 static int gaudi_restore_user_registers(struct hl_device *hdev)
6064 {
6065         int rc;
6066
6067         rc = gaudi_restore_sm_registers(hdev);
6068         if (rc)
6069                 return rc;
6070
6071         gaudi_restore_dma_registers(hdev);
6072         gaudi_restore_qm_registers(hdev);
6073
6074         return 0;
6075 }
6076
6077 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6078 {
6079         return 0;
6080 }
6081
6082 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6083 {
6084         struct asic_fixed_properties *prop = &hdev->asic_prop;
6085         struct gaudi_device *gaudi = hdev->asic_specific;
6086         u64 addr = prop->mmu_pgt_addr;
6087         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6088
6089         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6090                 return 0;
6091
6092         return gaudi_memset_device_memory(hdev, addr, size, 0);
6093 }
6094
6095 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6096 {
6097
6098 }
6099
6100 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6101                         bool user_address, u32 *val)
6102 {
6103         struct asic_fixed_properties *prop = &hdev->asic_prop;
6104         u64 hbm_bar_addr, host_phys_end;
6105         int rc = 0;
6106
6107         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6108
6109         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6110
6111                 *val = RREG32(addr - CFG_BASE);
6112
6113         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6114
6115                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6116
6117         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6118
6119                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6120
6121                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6122
6123                 if (hbm_bar_addr != U64_MAX) {
6124                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6125                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6126                 }
6127
6128                 if (hbm_bar_addr == U64_MAX)
6129                         rc = -EIO;
6130
6131         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6132                         user_address && !iommu_present(&pci_bus_type)) {
6133
6134                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6135
6136         } else {
6137                 rc = -EFAULT;
6138         }
6139
6140         return rc;
6141 }
6142
6143 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6144                         bool user_address, u32 val)
6145 {
6146         struct asic_fixed_properties *prop = &hdev->asic_prop;
6147         u64 hbm_bar_addr, host_phys_end;
6148         int rc = 0;
6149
6150         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6151
6152         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6153
6154                 WREG32(addr - CFG_BASE, val);
6155
6156         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6157
6158                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6159
6160         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6161
6162                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6163
6164                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6165
6166                 if (hbm_bar_addr != U64_MAX) {
6167                         writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6168                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6169                 }
6170
6171                 if (hbm_bar_addr == U64_MAX)
6172                         rc = -EIO;
6173
6174         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6175                         user_address && !iommu_present(&pci_bus_type)) {
6176
6177                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6178
6179         } else {
6180                 rc = -EFAULT;
6181         }
6182
6183         return rc;
6184 }
6185
6186 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6187                                 bool user_address, u64 *val)
6188 {
6189         struct asic_fixed_properties *prop = &hdev->asic_prop;
6190         u64 hbm_bar_addr, host_phys_end;
6191         int rc = 0;
6192
6193         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6194
6195         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6196
6197                 u32 val_l = RREG32(addr - CFG_BASE);
6198                 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6199
6200                 *val = (((u64) val_h) << 32) | val_l;
6201
6202         } else if ((addr >= SRAM_BASE_ADDR) &&
6203                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6204
6205                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6206
6207         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6208
6209                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6210
6211                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6212
6213                 if (hbm_bar_addr != U64_MAX) {
6214                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6215                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6216                 }
6217
6218                 if (hbm_bar_addr == U64_MAX)
6219                         rc = -EIO;
6220
6221         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6222                         user_address && !iommu_present(&pci_bus_type)) {
6223
6224                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6225
6226         } else {
6227                 rc = -EFAULT;
6228         }
6229
6230         return rc;
6231 }
6232
6233 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6234                                 bool user_address, u64 val)
6235 {
6236         struct asic_fixed_properties *prop = &hdev->asic_prop;
6237         u64 hbm_bar_addr, host_phys_end;
6238         int rc = 0;
6239
6240         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6241
6242         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6243
6244                 WREG32(addr - CFG_BASE, lower_32_bits(val));
6245                 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
6246
6247         } else if ((addr >= SRAM_BASE_ADDR) &&
6248                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6249
6250                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6251
6252         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6253
6254                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6255
6256                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6257
6258                 if (hbm_bar_addr != U64_MAX) {
6259                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6260                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6261                 }
6262
6263                 if (hbm_bar_addr == U64_MAX)
6264                         rc = -EIO;
6265
6266         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6267                         user_address && !iommu_present(&pci_bus_type)) {
6268
6269                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6270
6271         } else {
6272                 rc = -EFAULT;
6273         }
6274
6275         return rc;
6276 }
6277
6278 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6279                                         u32 size_to_dma, dma_addr_t dma_addr)
6280 {
6281         u32 err_cause, val;
6282         u64 dma_offset;
6283         int rc;
6284
6285         dma_offset = dma_id * DMA_CORE_OFFSET;
6286
6287         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6288         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6289         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6290         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6291         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6292         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6293                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6294
6295         rc = hl_poll_timeout(
6296                 hdev,
6297                 mmDMA0_CORE_STS0 + dma_offset,
6298                 val,
6299                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6300                 0,
6301                 1000000);
6302
6303         if (rc) {
6304                 dev_err(hdev->dev,
6305                         "DMA %d timed-out during reading of 0x%llx\n",
6306                         dma_id, addr);
6307                 return -EIO;
6308         }
6309
6310         /* Verify DMA is OK */
6311         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6312         if (err_cause) {
6313                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6314                 dev_dbg(hdev->dev,
6315                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6316                         err_cause);
6317                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6318
6319                 return -EIO;
6320         }
6321
6322         return 0;
6323 }
6324
6325 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6326                                 void *blob_addr)
6327 {
6328         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6329         u32 qm_glbl_sts0, qm_cgm_sts;
6330         u64 dma_offset, qm_offset;
6331         dma_addr_t dma_addr;
6332         void *kernel_addr;
6333         bool is_eng_idle;
6334         int rc = 0, dma_id;
6335
6336         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6337                                                 hdev, SZ_2M,
6338                                                 &dma_addr,
6339                                                 GFP_KERNEL | __GFP_ZERO);
6340
6341         if (!kernel_addr)
6342                 return -ENOMEM;
6343
6344         hdev->asic_funcs->hw_queues_lock(hdev);
6345
6346         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6347         dma_offset = dma_id * DMA_CORE_OFFSET;
6348         qm_offset = dma_id * DMA_QMAN_OFFSET;
6349         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6350         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6351         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6352         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6353                       IS_DMA_IDLE(dma_core_sts0);
6354
6355         if (!is_eng_idle) {
6356                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6357                 dma_offset = dma_id * DMA_CORE_OFFSET;
6358                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6359                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6360                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6361                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6362                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6363                               IS_DMA_IDLE(dma_core_sts0);
6364
6365                 if (!is_eng_idle) {
6366                         dev_err_ratelimited(hdev->dev,
6367                                 "Can't read via DMA because it is BUSY\n");
6368                         rc = -EAGAIN;
6369                         goto out;
6370                 }
6371         }
6372
6373         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6374         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6375                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6376
6377         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6378          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6379          * ASID
6380          */
6381         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6382
6383         /* Verify DMA is OK */
6384         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6385         if (err_cause) {
6386                 dev_dbg(hdev->dev,
6387                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6388                         err_cause);
6389                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6390         }
6391
6392         pos = 0;
6393         size_left = size;
6394         size_to_dma = SZ_2M;
6395
6396         while (size_left > 0) {
6397
6398                 if (size_left < SZ_2M)
6399                         size_to_dma = size_left;
6400
6401                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6402                                                 dma_addr);
6403                 if (rc)
6404                         break;
6405
6406                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6407
6408                 if (size_left <= SZ_2M)
6409                         break;
6410
6411                 pos += SZ_2M;
6412                 addr += SZ_2M;
6413                 size_left -= SZ_2M;
6414         }
6415
6416         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6417          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6418          * ASID
6419          */
6420         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6421                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6422
6423         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6424
6425 out:
6426         hdev->asic_funcs->hw_queues_unlock(hdev);
6427
6428         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6429                                                 dma_addr);
6430
6431         return rc;
6432 }
6433
6434 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6435 {
6436         struct gaudi_device *gaudi = hdev->asic_specific;
6437
6438         if (hdev->reset_info.hard_reset_pending)
6439                 return U64_MAX;
6440
6441         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6442                         (addr - gaudi->hbm_bar_cur_addr));
6443 }
6444
6445 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6446 {
6447         struct gaudi_device *gaudi = hdev->asic_specific;
6448
6449         if (hdev->reset_info.hard_reset_pending)
6450                 return;
6451
6452         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6453                         (addr - gaudi->hbm_bar_cur_addr));
6454 }
6455
6456 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6457 {
6458         /* mask to zero the MMBP and ASID bits */
6459         WREG32_AND(reg, ~0x7FF);
6460         WREG32_OR(reg, asid);
6461 }
6462
6463 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6464 {
6465         struct gaudi_device *gaudi = hdev->asic_specific;
6466
6467         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6468                 return;
6469
6470         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6471                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6472                 return;
6473         }
6474
6475         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6476         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6477         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6478         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6479         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6480
6481         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6482         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6483         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6484         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6485         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6486
6487         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6488         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6489         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6490         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6491         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6492
6493         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6494         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6495         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6496         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6497         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6498
6499         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6500         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6501         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6502         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6503         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6504
6505         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6506         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6507         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6508         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6509         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6510
6511         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6512         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6513         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6514         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6515         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6516
6517         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6518         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6519         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6520         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6521         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6522
6523         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6524         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6525         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6526         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6527         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6528         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6529         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6530         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6531
6532         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6533         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6534         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6535         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6536         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6537         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6538         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6539
6540         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6541         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6542         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6543         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6544         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6545         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6547
6548         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6550         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6551         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6555
6556         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6557         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6560         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6562         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6563
6564         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6571
6572         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6579
6580         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6587
6588         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6595
6596         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6606
6607         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6619
6620         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6621                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6622                                 asid);
6623                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6624                                 asid);
6625                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6626                                 asid);
6627                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6628                                 asid);
6629                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6630                                 asid);
6631         }
6632
6633         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6634                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6635                                 asid);
6636                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6637                                 asid);
6638                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6639                                 asid);
6640                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6641                                 asid);
6642                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6643                                 asid);
6644         }
6645
6646         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6647                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6648                                 asid);
6649                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6650                                 asid);
6651                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6652                                 asid);
6653                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6654                                 asid);
6655                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6656                                 asid);
6657         }
6658
6659         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6660                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6661                                 asid);
6662                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6663                                 asid);
6664                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6665                                 asid);
6666                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6667                                 asid);
6668                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6669                                 asid);
6670         }
6671
6672         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6673                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6674                                 asid);
6675                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6676                                 asid);
6677                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6678                                 asid);
6679                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6680                                 asid);
6681                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6682                                 asid);
6683         }
6684
6685         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6686                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6687                                 asid);
6688                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6689                                 asid);
6690                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6691                                 asid);
6692                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6693                                 asid);
6694                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6695                                 asid);
6696         }
6697
6698         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6699                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6700                                 asid);
6701                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6702                                 asid);
6703                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6704                                 asid);
6705                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6706                                 asid);
6707                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6708                                 asid);
6709         }
6710
6711         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6713                                 asid);
6714                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6715                                 asid);
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6719                                 asid);
6720                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6721                                 asid);
6722         }
6723
6724         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6726                                 asid);
6727                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6728                                 asid);
6729                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6730                                 asid);
6731                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6732                                 asid);
6733                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6734                                 asid);
6735         }
6736
6737         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6739                                 asid);
6740                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6741                                 asid);
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6745                                 asid);
6746                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6747                                 asid);
6748         }
6749
6750         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6751         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6752 }
6753
6754 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6755                 struct hl_cs_job *job)
6756 {
6757         struct packet_msg_prot *fence_pkt;
6758         u32 *fence_ptr;
6759         dma_addr_t fence_dma_addr;
6760         struct hl_cb *cb;
6761         u32 tmp, timeout, dma_offset;
6762         int rc;
6763
6764         if (hdev->pldm)
6765                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6766         else
6767                 timeout = HL_DEVICE_TIMEOUT_USEC;
6768
6769         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6770                 dev_err_ratelimited(hdev->dev,
6771                         "Can't send driver job on QMAN0 because the device is not idle\n");
6772                 return -EBUSY;
6773         }
6774
6775         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6776                                                         &fence_dma_addr);
6777         if (!fence_ptr) {
6778                 dev_err(hdev->dev,
6779                         "Failed to allocate fence memory for QMAN0\n");
6780                 return -ENOMEM;
6781         }
6782
6783         cb = job->patched_cb;
6784
6785         fence_pkt = cb->kernel_address +
6786                         job->job_cb_size - sizeof(struct packet_msg_prot);
6787
6788         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6789         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6790         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6791
6792         fence_pkt->ctl = cpu_to_le32(tmp);
6793         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6794         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6795
6796         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6797
6798         WREG32(mmDMA0_CORE_PROT + dma_offset,
6799                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6800
6801         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6802                                         job->job_cb_size, cb->bus_address);
6803         if (rc) {
6804                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6805                 goto free_fence_ptr;
6806         }
6807
6808         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6809                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6810                                 timeout, true);
6811
6812         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6813
6814         if (rc == -ETIMEDOUT) {
6815                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6816                 goto free_fence_ptr;
6817         }
6818
6819 free_fence_ptr:
6820         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6821
6822         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6823                                         fence_dma_addr);
6824         return rc;
6825 }
6826
6827 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6828 {
6829         if (event_type >= GAUDI_EVENT_SIZE)
6830                 goto event_not_supported;
6831
6832         if (!gaudi_irq_map_table[event_type].valid)
6833                 goto event_not_supported;
6834
6835         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6836
6837         return;
6838
6839 event_not_supported:
6840         snprintf(desc, size, "N/A");
6841 }
6842
6843 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6844                                                         bool is_write, s32 *engine_id_1,
6845                                                         s32 *engine_id_2)
6846 {
6847         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6848
6849         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6850                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6851
6852         switch (x_y) {
6853         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6854         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6855                 dma_id[0] = 0;
6856                 dma_id[1] = 2;
6857                 break;
6858         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6859         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6860                 dma_id[0] = 1;
6861                 dma_id[1] = 3;
6862                 break;
6863         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6864         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6865                 dma_id[0] = 4;
6866                 dma_id[1] = 6;
6867                 break;
6868         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6869         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6870                 dma_id[0] = 5;
6871                 dma_id[1] = 7;
6872                 break;
6873         default:
6874                 goto unknown_initiator;
6875         }
6876
6877         for (i = 0 ; i < 2 ; i++) {
6878                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6879                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6880         }
6881
6882         switch (x_y) {
6883         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6884         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6885                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6886                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6887                         return "DMA0";
6888                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6889                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6890                         return "DMA2";
6891                 } else {
6892                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6893                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6894                         return "DMA0 or DMA2";
6895                 }
6896         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6897         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6898                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6899                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6900                         return "DMA1";
6901                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6902                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6903                         return "DMA3";
6904                 } else {
6905                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6906                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6907                         return "DMA1 or DMA3";
6908                 }
6909         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6910         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6911                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6912                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6913                         return "DMA4";
6914                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6915                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6916                         return "DMA6";
6917                 } else {
6918                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6919                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6920                         return "DMA4 or DMA6";
6921                 }
6922         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6923         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6924                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6925                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6926                         return "DMA5";
6927                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6928                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6929                         return "DMA7";
6930                 } else {
6931                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6932                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6933                         return "DMA5 or DMA7";
6934                 }
6935         }
6936
6937 unknown_initiator:
6938         return "unknown initiator";
6939 }
6940
6941 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6942                                                         u32 *engine_id_1, u32 *engine_id_2)
6943 {
6944         u32 val, x_y, axi_id;
6945
6946         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6947                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6948         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6949                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6950         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6951                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6952
6953         switch (x_y) {
6954         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6955                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6956                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6957                         return "TPC0";
6958                 }
6959                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6960                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6961                         return "NIC0";
6962                 }
6963                 break;
6964         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6965                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6966                 return "TPC1";
6967         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6968         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6969                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6970                 return "MME0";
6971         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6972         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6973                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6974                 return "MME1";
6975         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6976                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6977                 return "TPC2";
6978         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6979                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6980                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6981                         return "TPC3";
6982                 }
6983                 /* PCI, CPU or PSOC does not have engine id*/
6984                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6985                         return "PCI";
6986                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6987                         return "CPU";
6988                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6989                         return "PSOC";
6990                 break;
6991         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6992         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6993         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6994         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6995         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6996         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6997         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6998         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6999                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
7000                                 engine_id_1, engine_id_2);
7001         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7002                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7003                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
7004                         return "TPC4";
7005                 }
7006                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7007                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
7008                         return "NIC1";
7009                 }
7010                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7011                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
7012                         return "NIC2";
7013                 }
7014                 break;
7015         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7016                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
7017                 return "TPC5";
7018         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7019         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7020                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
7021                 return "MME2";
7022         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7023         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7024                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
7025                 return "MME3";
7026         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7027                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
7028                 return "TPC6";
7029         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7030                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7031                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
7032                         return "TPC7";
7033                 }
7034                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7035                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
7036                         return "NIC4";
7037                 }
7038                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7039                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
7040                         return "NIC5";
7041                 }
7042                 break;
7043         default:
7044                 break;
7045         }
7046
7047         dev_err(hdev->dev,
7048                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7049                 val,
7050                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7051                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7052                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7053                         RAZWI_INITIATOR_AXI_ID_MASK);
7054
7055         return "unknown initiator";
7056 }
7057
7058 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
7059                                                 u32 *engine_id_2)
7060 {
7061
7062         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7063                 dev_err_ratelimited(hdev->dev,
7064                         "RAZWI event caused by illegal write of %s\n",
7065                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
7066                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7067         }
7068
7069         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7070                 dev_err_ratelimited(hdev->dev,
7071                         "RAZWI event caused by illegal read of %s\n",
7072                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
7073                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7074         }
7075 }
7076
7077 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
7078 {
7079         struct gaudi_device *gaudi = hdev->asic_specific;
7080         u32 val;
7081
7082         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7083                 return;
7084
7085         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7086         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7087                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7088                 *addr <<= 32;
7089                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7090
7091                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
7092                 *type = HL_RAZWI_PAGE_FAULT;
7093
7094                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7095         }
7096
7097         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7098         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7099                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7100                 *addr <<= 32;
7101                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7102
7103                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
7104                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
7105
7106                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7107         }
7108 }
7109
7110 /*
7111  *  +-------------------+------------------------------------------------------+
7112  *  | Configuration Reg |                     Description                      |
7113  *  |      Address      |                                                      |
7114  *  +-------------------+------------------------------------------------------+
7115  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7116  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7117  *  |                   |0xF34 memory wrappers 63:32                           |
7118  *  |                   |0xF38 memory wrappers 95:64                           |
7119  *  |                   |0xF3C memory wrappers 127:96                          |
7120  *  +-------------------+------------------------------------------------------+
7121  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7122  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7123  *  |                   |0xF44 memory wrappers 63:32                           |
7124  *  |                   |0xF48 memory wrappers 95:64                           |
7125  *  |                   |0xF4C memory wrappers 127:96                          |
7126  *  +-------------------+------------------------------------------------------+
7127  */
7128 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7129                 struct ecc_info_extract_params *params, u64 *ecc_address,
7130                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7131 {
7132         u32 i, num_mem_regs, reg, err_bit;
7133         u64 err_addr, err_word = 0;
7134
7135         num_mem_regs = params->num_memories / 32 +
7136                         ((params->num_memories % 32) ? 1 : 0);
7137
7138         if (params->block_address >= CFG_BASE)
7139                 params->block_address -= CFG_BASE;
7140
7141         if (params->derr)
7142                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7143         else
7144                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7145
7146         /* Set invalid wrapper index */
7147         *memory_wrapper_idx = 0xFF;
7148
7149         /* Iterate through memory wrappers, a single bit must be set */
7150         for (i = 0 ; i < num_mem_regs ; i++) {
7151                 err_addr += i * 4;
7152                 err_word = RREG32(err_addr);
7153                 if (err_word) {
7154                         err_bit = __ffs(err_word);
7155                         *memory_wrapper_idx = err_bit + (32 * i);
7156                         break;
7157                 }
7158         }
7159
7160         if (*memory_wrapper_idx == 0xFF) {
7161                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7162                 return -EINVAL;
7163         }
7164
7165         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7166                         *memory_wrapper_idx);
7167
7168         *ecc_address =
7169                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7170         *ecc_syndrom =
7171                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7172
7173         /* Clear error indication */
7174         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7175         if (params->derr)
7176                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7177         else
7178                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7179
7180         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7181
7182         return 0;
7183 }
7184
7185 /*
7186  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7187  *
7188  * @idx: the current pi/ci value
7189  * @q_len: the queue length (power of 2)
7190  *
7191  * @return the cyclically decremented index
7192  */
7193 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7194 {
7195         u32 mask = q_len - 1;
7196
7197         /*
7198          * modular decrement is equivalent to adding (queue_size -1)
7199          * later we take LSBs to make sure the value is in the
7200          * range [0, queue_len - 1]
7201          */
7202         return (idx + q_len - 1) & mask;
7203 }
7204
7205 /**
7206  * gaudi_print_sw_config_stream_data - print SW config stream data
7207  *
7208  * @hdev: pointer to the habanalabs device structure
7209  * @stream: the QMAN's stream
7210  * @qman_base: base address of QMAN registers block
7211  */
7212 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7213                                                 u64 qman_base)
7214 {
7215         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7216         u32 cq_ptr_lo_off, size;
7217
7218         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7219
7220         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7221                                                 stream * cq_ptr_lo_off;
7222         cq_ptr_hi = cq_ptr_lo +
7223                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7224         cq_tsize = cq_ptr_lo +
7225                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7226
7227         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7228         size = RREG32(cq_tsize);
7229         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7230                                                         stream, cq_ptr, size);
7231 }
7232
7233 /**
7234  * gaudi_print_last_pqes_on_err - print last PQEs on error
7235  *
7236  * @hdev: pointer to the habanalabs device structure
7237  * @qid_base: first QID of the QMAN (out of 4 streams)
7238  * @stream: the QMAN's stream
7239  * @qman_base: base address of QMAN registers block
7240  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7241  */
7242 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7243                                                 u32 stream, u64 qman_base,
7244                                                 bool pr_sw_conf)
7245 {
7246         u32 ci, qm_ci_stream_off, queue_len;
7247         struct hl_hw_queue *q;
7248         u64 pq_ci;
7249         int i;
7250
7251         q = &hdev->kernel_queues[qid_base + stream];
7252
7253         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7254         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7255                                                 stream * qm_ci_stream_off;
7256
7257         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7258                                         q->int_queue_len : HL_QUEUE_LENGTH;
7259
7260         hdev->asic_funcs->hw_queues_lock(hdev);
7261
7262         if (pr_sw_conf)
7263                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7264
7265         ci = RREG32(pq_ci);
7266
7267         /* we should start printing form ci -1 */
7268         ci = gaudi_queue_idx_dec(ci, queue_len);
7269
7270         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7271                 struct hl_bd *bd;
7272                 u64 addr;
7273                 u32 len;
7274
7275                 bd = q->kernel_address;
7276                 bd += ci;
7277
7278                 len = le32_to_cpu(bd->len);
7279                 /* len 0 means uninitialized entry- break */
7280                 if (!len)
7281                         break;
7282
7283                 addr = le64_to_cpu(bd->ptr);
7284
7285                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7286                                                         stream, ci, addr, len);
7287
7288                 /* get previous ci, wrap if needed */
7289                 ci = gaudi_queue_idx_dec(ci, queue_len);
7290         }
7291
7292         hdev->asic_funcs->hw_queues_unlock(hdev);
7293 }
7294
7295 /**
7296  * print_qman_data_on_err - extract QMAN data on error
7297  *
7298  * @hdev: pointer to the habanalabs device structure
7299  * @qid_base: first QID of the QMAN (out of 4 streams)
7300  * @stream: the QMAN's stream
7301  * @qman_base: base address of QMAN registers block
7302  *
7303  * This function attempt to exatract as much data as possible on QMAN error.
7304  * On upper CP print the SW config stream data and last 8 PQEs.
7305  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7306  */
7307 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7308                                                 u32 stream, u64 qman_base)
7309 {
7310         u32 i;
7311
7312         if (stream != QMAN_STREAMS) {
7313                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7314                                                                         true);
7315                 return;
7316         }
7317
7318         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7319
7320         for (i = 0; i < QMAN_STREAMS; i++)
7321                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7322                                                                         false);
7323 }
7324
7325 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7326                                           const char *qm_name,
7327                                           u64 qman_base,
7328                                           u32 qid_base)
7329 {
7330         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7331         u64 glbl_sts_addr, arb_err_addr;
7332         char reg_desc[32];
7333
7334         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7335         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7336
7337         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7338         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7339                 glbl_sts_clr_val = 0;
7340                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7341
7342                 if (!glbl_sts_val)
7343                         continue;
7344
7345                 if (i == QMAN_STREAMS)
7346                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7347                 else
7348                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7349
7350                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7351                         if (glbl_sts_val & BIT(j)) {
7352                                 dev_err_ratelimited(hdev->dev,
7353                                                 "%s %s. err cause: %s\n",
7354                                                 qm_name, reg_desc,
7355                                                 gaudi_qman_error_cause[j]);
7356                                 glbl_sts_clr_val |= BIT(j);
7357                         }
7358                 }
7359
7360                 /* Write 1 clear errors */
7361                 if (!hdev->stop_on_err)
7362                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7363                 else
7364                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7365         }
7366
7367         arb_err_val = RREG32(arb_err_addr);
7368
7369         if (!arb_err_val)
7370                 return;
7371
7372         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7373                 if (arb_err_val & BIT(j)) {
7374                         dev_err_ratelimited(hdev->dev,
7375                                         "%s ARB_ERR. err cause: %s\n",
7376                                         qm_name,
7377                                         gaudi_qman_arb_error_cause[j]);
7378                 }
7379         }
7380 }
7381
7382 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7383                 struct hl_eq_sm_sei_data *sei_data)
7384 {
7385         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7386
7387         /* Flip the bits as the enum is ordered in the opposite way */
7388         index = (index ^ 0x3) & 0x3;
7389
7390         switch (sei_data->sei_cause) {
7391         case SM_SEI_SO_OVERFLOW:
7392                 dev_err_ratelimited(hdev->dev,
7393                         "%s SEI Error: SOB Group %u overflow/underflow",
7394                         gaudi_sync_manager_names[index],
7395                         le32_to_cpu(sei_data->sei_log));
7396                 break;
7397         case SM_SEI_LBW_4B_UNALIGNED:
7398                 dev_err_ratelimited(hdev->dev,
7399                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7400                         gaudi_sync_manager_names[index],
7401                         le32_to_cpu(sei_data->sei_log));
7402                 break;
7403         case SM_SEI_AXI_RESPONSE_ERR:
7404                 dev_err_ratelimited(hdev->dev,
7405                         "%s SEI Error: AXI ID %u response error",
7406                         gaudi_sync_manager_names[index],
7407                         le32_to_cpu(sei_data->sei_log));
7408                 break;
7409         default:
7410                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7411                                 le32_to_cpu(sei_data->sei_log));
7412                 break;
7413         }
7414 }
7415
7416 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7417                 struct hl_eq_ecc_data *ecc_data)
7418 {
7419         struct ecc_info_extract_params params;
7420         u64 ecc_address = 0, ecc_syndrom = 0;
7421         u8 index, memory_wrapper_idx = 0;
7422         bool extract_info_from_fw;
7423         int rc;
7424
7425         if (hdev->asic_prop.fw_security_enabled) {
7426                 extract_info_from_fw = true;
7427                 goto extract_ecc_info;
7428         }
7429
7430         switch (event_type) {
7431         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7432         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7433                 extract_info_from_fw = true;
7434                 break;
7435         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7436                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7437                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7438                 params.num_memories = 90;
7439                 params.derr = false;
7440                 extract_info_from_fw = false;
7441                 break;
7442         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7443                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7444                 params.block_address =
7445                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7446                 params.num_memories = 90;
7447                 params.derr = true;
7448                 extract_info_from_fw = false;
7449                 break;
7450         case GAUDI_EVENT_MME0_ACC_SERR:
7451         case GAUDI_EVENT_MME1_ACC_SERR:
7452         case GAUDI_EVENT_MME2_ACC_SERR:
7453         case GAUDI_EVENT_MME3_ACC_SERR:
7454                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7455                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7456                 params.num_memories = 128;
7457                 params.derr = false;
7458                 extract_info_from_fw = false;
7459                 break;
7460         case GAUDI_EVENT_MME0_ACC_DERR:
7461         case GAUDI_EVENT_MME1_ACC_DERR:
7462         case GAUDI_EVENT_MME2_ACC_DERR:
7463         case GAUDI_EVENT_MME3_ACC_DERR:
7464                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7465                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7466                 params.num_memories = 128;
7467                 params.derr = true;
7468                 extract_info_from_fw = false;
7469                 break;
7470         case GAUDI_EVENT_MME0_SBAB_SERR:
7471         case GAUDI_EVENT_MME1_SBAB_SERR:
7472         case GAUDI_EVENT_MME2_SBAB_SERR:
7473         case GAUDI_EVENT_MME3_SBAB_SERR:
7474                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7475                 params.block_address =
7476                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7477                 params.num_memories = 33;
7478                 params.derr = false;
7479                 extract_info_from_fw = false;
7480                 break;
7481         case GAUDI_EVENT_MME0_SBAB_DERR:
7482         case GAUDI_EVENT_MME1_SBAB_DERR:
7483         case GAUDI_EVENT_MME2_SBAB_DERR:
7484         case GAUDI_EVENT_MME3_SBAB_DERR:
7485                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7486                 params.block_address =
7487                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7488                 params.num_memories = 33;
7489                 params.derr = true;
7490                 extract_info_from_fw = false;
7491                 break;
7492         default:
7493                 return;
7494         }
7495
7496 extract_ecc_info:
7497         if (extract_info_from_fw) {
7498                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7499                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7500                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7501         } else {
7502                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7503                                 &ecc_syndrom, &memory_wrapper_idx);
7504                 if (rc)
7505                         return;
7506         }
7507
7508         dev_err(hdev->dev,
7509                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7510                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7511 }
7512
7513 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7514 {
7515         u64 qman_base;
7516         char desc[32];
7517         u32 qid_base;
7518         u8 index;
7519
7520         switch (event_type) {
7521         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7522                 index = event_type - GAUDI_EVENT_TPC0_QM;
7523                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7524                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7525                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7526                 break;
7527         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7528                 index = event_type - GAUDI_EVENT_MME0_QM;
7529                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7530                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7531                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7532                 break;
7533         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7534                 index = event_type - GAUDI_EVENT_DMA0_QM;
7535                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7536                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7537                 if (index > 1)
7538                         qid_base++;
7539                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7540                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7541                 break;
7542         case GAUDI_EVENT_NIC0_QM0:
7543                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7544                 qman_base = mmNIC0_QM0_BASE;
7545                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7546                 break;
7547         case GAUDI_EVENT_NIC0_QM1:
7548                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7549                 qman_base = mmNIC0_QM1_BASE;
7550                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7551                 break;
7552         case GAUDI_EVENT_NIC1_QM0:
7553                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7554                 qman_base = mmNIC1_QM0_BASE;
7555                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7556                 break;
7557         case GAUDI_EVENT_NIC1_QM1:
7558                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7559                 qman_base = mmNIC1_QM1_BASE;
7560                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7561                 break;
7562         case GAUDI_EVENT_NIC2_QM0:
7563                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7564                 qman_base = mmNIC2_QM0_BASE;
7565                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7566                 break;
7567         case GAUDI_EVENT_NIC2_QM1:
7568                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7569                 qman_base = mmNIC2_QM1_BASE;
7570                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7571                 break;
7572         case GAUDI_EVENT_NIC3_QM0:
7573                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7574                 qman_base = mmNIC3_QM0_BASE;
7575                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7576                 break;
7577         case GAUDI_EVENT_NIC3_QM1:
7578                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7579                 qman_base = mmNIC3_QM1_BASE;
7580                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7581                 break;
7582         case GAUDI_EVENT_NIC4_QM0:
7583                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7584                 qman_base = mmNIC4_QM0_BASE;
7585                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7586                 break;
7587         case GAUDI_EVENT_NIC4_QM1:
7588                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7589                 qman_base = mmNIC4_QM1_BASE;
7590                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7591                 break;
7592         default:
7593                 return;
7594         }
7595
7596         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7597 }
7598
7599 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7600                                         bool razwi)
7601 {
7602         u32 engine_id_1, engine_id_2;
7603         char desc[64] = "";
7604         u64 razwi_addr = 0;
7605         u8 razwi_type;
7606         int rc;
7607
7608         /*
7609          * Init engine id by default as not valid and only if razwi initiated from engine with
7610          * engine id it will get valid value.
7611          * Init razwi type to default, will be changed only if razwi caused by page fault of
7612          * MMU access error
7613          */
7614         engine_id_1 = U16_MAX;
7615         engine_id_2 = U16_MAX;
7616         razwi_type = U8_MAX;
7617
7618         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7619         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7620                 event_type, desc);
7621
7622         if (razwi) {
7623                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7624                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7625
7626                 /* In case it's the first razwi, save its parameters*/
7627                 rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
7628                 if (!rc) {
7629                         hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
7630                         hdev->last_error.razwi_timestamp = ktime_get();
7631                         hdev->last_error.razwi_addr = razwi_addr;
7632                         hdev->last_error.razwi_engine_id_1 = engine_id_1;
7633                         hdev->last_error.razwi_engine_id_2 = engine_id_2;
7634                         /*
7635                          * If first engine id holds non valid value the razwi initiator
7636                          * does not have engine id
7637                          */
7638                         hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
7639                         hdev->last_error.razwi_type = razwi_type;
7640
7641                 }
7642         }
7643 }
7644
7645 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7646                                         struct cpucp_pkt_sync_err *sync_err)
7647 {
7648         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7649
7650         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7651                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7652 }
7653
7654 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7655                                         struct hl_eq_fw_alive *fw_alive)
7656 {
7657         dev_err(hdev->dev,
7658                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7659                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7660                 "Minor" : "Critical", fw_alive->process_id,
7661                 fw_alive->thread_id, fw_alive->uptime_seconds);
7662 }
7663
7664 static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
7665 {
7666         /* GAUDI doesn't support any reset except hard-reset */
7667         return -EPERM;
7668 }
7669
7670 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7671                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7672 {
7673         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7674         int rc = 0;
7675
7676         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7677                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7678                 if (!hbm_ecc_data) {
7679                         dev_err(hdev->dev, "No FW ECC data");
7680                         return 0;
7681                 }
7682
7683                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7684                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7685                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7686                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7687                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7688                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7689                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7690                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7691                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7692                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7693                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7694                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7695                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7696                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7697
7698                 dev_err(hdev->dev,
7699                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7700                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7701                 dev_err(hdev->dev,
7702                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7703                         device, ch, hbm_ecc_data->first_addr, type,
7704                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7705                         hbm_ecc_data->dec_cnt);
7706                 return 0;
7707         }
7708
7709         if (hdev->asic_prop.fw_security_enabled) {
7710                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7711                 return 0;
7712         }
7713
7714         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7715         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7716                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7717                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7718                 if (val) {
7719                         rc = -EIO;
7720                         dev_err(hdev->dev,
7721                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7722                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7723                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7724                                 (val >> 4) & 0x1);
7725
7726                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7727                         dev_err(hdev->dev,
7728                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7729                                 device, ch * 2,
7730                                 RREG32(base + ch * 0x1000 + 0x064),
7731                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7732                                 (val2 & 0xFF0000) >> 16,
7733                                 (val2 & 0xFF000000) >> 24);
7734                 }
7735
7736                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7737                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7738                 if (val) {
7739                         rc = -EIO;
7740                         dev_err(hdev->dev,
7741                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7742                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7743                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7744                                 (val >> 4) & 0x1);
7745
7746                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7747                         dev_err(hdev->dev,
7748                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7749                                 device, ch * 2 + 1,
7750                                 RREG32(base + ch * 0x1000 + 0x074),
7751                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7752                                 (val2 & 0xFF0000) >> 16,
7753                                 (val2 & 0xFF000000) >> 24);
7754                 }
7755
7756                 /* Clear interrupts */
7757                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7758                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7759                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7760                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7761                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7762                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7763         }
7764
7765         val  = RREG32(base + 0x8F30);
7766         val2 = RREG32(base + 0x8F34);
7767         if (val | val2) {
7768                 rc = -EIO;
7769                 dev_err(hdev->dev,
7770                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7771                         device, val, val2);
7772         }
7773         val  = RREG32(base + 0x8F40);
7774         val2 = RREG32(base + 0x8F44);
7775         if (val | val2) {
7776                 rc = -EIO;
7777                 dev_err(hdev->dev,
7778                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7779                         device, val, val2);
7780         }
7781
7782         return rc;
7783 }
7784
7785 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7786 {
7787         switch (hbm_event_type) {
7788         case GAUDI_EVENT_HBM0_SPI_0:
7789         case GAUDI_EVENT_HBM0_SPI_1:
7790                 return 0;
7791         case GAUDI_EVENT_HBM1_SPI_0:
7792         case GAUDI_EVENT_HBM1_SPI_1:
7793                 return 1;
7794         case GAUDI_EVENT_HBM2_SPI_0:
7795         case GAUDI_EVENT_HBM2_SPI_1:
7796                 return 2;
7797         case GAUDI_EVENT_HBM3_SPI_0:
7798         case GAUDI_EVENT_HBM3_SPI_1:
7799                 return 3;
7800         default:
7801                 break;
7802         }
7803
7804         /* Should never happen */
7805         return 0;
7806 }
7807
7808 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7809                                         char *interrupt_name)
7810 {
7811         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7812         bool soft_reset_required = false;
7813
7814         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7815                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7816
7817         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7818                 if (tpc_interrupts_cause & BIT(i)) {
7819                         dev_err_ratelimited(hdev->dev,
7820                                         "TPC%d_%s interrupt cause: %s\n",
7821                                         tpc_id, interrupt_name,
7822                                         gaudi_tpc_interrupts_cause[i]);
7823                         /* If this is QM error, we need to soft-reset */
7824                         if (i == 15)
7825                                 soft_reset_required = true;
7826                 }
7827
7828         /* Clear interrupts */
7829         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7830
7831         return soft_reset_required;
7832 }
7833
7834 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7835 {
7836         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7837 }
7838
7839 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7840 {
7841         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7842 }
7843
7844 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7845                                         u16 event_type)
7846 {
7847         ktime_t zero_time = ktime_set(0, 0);
7848
7849         mutex_lock(&hdev->clk_throttling.lock);
7850
7851         switch (event_type) {
7852         case GAUDI_EVENT_FIX_POWER_ENV_S:
7853                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7854                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7855                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7856                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7857                 dev_info_ratelimited(hdev->dev,
7858                         "Clock throttling due to power consumption\n");
7859                 break;
7860
7861         case GAUDI_EVENT_FIX_POWER_ENV_E:
7862                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7863                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7864                 dev_info_ratelimited(hdev->dev,
7865                         "Power envelop is safe, back to optimal clock\n");
7866                 break;
7867
7868         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7869                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7870                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7871                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7872                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7873                 dev_info_ratelimited(hdev->dev,
7874                         "Clock throttling due to overheating\n");
7875                 break;
7876
7877         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7878                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7879                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7880                 dev_info_ratelimited(hdev->dev,
7881                         "Thermal envelop is safe, back to optimal clock\n");
7882                 break;
7883
7884         default:
7885                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7886                         event_type);
7887                 break;
7888         }
7889
7890         mutex_unlock(&hdev->clk_throttling.lock);
7891 }
7892
7893 static void gaudi_handle_eqe(struct hl_device *hdev,
7894                                 struct hl_eq_entry *eq_entry)
7895 {
7896         struct gaudi_device *gaudi = hdev->asic_specific;
7897         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7898         u32 fw_fatal_err_flag = 0;
7899         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7900                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7901         bool reset_required;
7902         u8 cause;
7903         int rc;
7904
7905         if (event_type >= GAUDI_EVENT_SIZE) {
7906                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7907                                 event_type, GAUDI_EVENT_SIZE - 1);
7908                 return;
7909         }
7910
7911         gaudi->events_stat[event_type]++;
7912         gaudi->events_stat_aggregate[event_type]++;
7913
7914         switch (event_type) {
7915         case GAUDI_EVENT_PCIE_CORE_DERR:
7916         case GAUDI_EVENT_PCIE_IF_DERR:
7917         case GAUDI_EVENT_PCIE_PHY_DERR:
7918         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7919         case GAUDI_EVENT_MME0_ACC_DERR:
7920         case GAUDI_EVENT_MME0_SBAB_DERR:
7921         case GAUDI_EVENT_MME1_ACC_DERR:
7922         case GAUDI_EVENT_MME1_SBAB_DERR:
7923         case GAUDI_EVENT_MME2_ACC_DERR:
7924         case GAUDI_EVENT_MME2_SBAB_DERR:
7925         case GAUDI_EVENT_MME3_ACC_DERR:
7926         case GAUDI_EVENT_MME3_SBAB_DERR:
7927         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7928                 fallthrough;
7929         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7930         case GAUDI_EVENT_PSOC_MEM_DERR:
7931         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7932         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7933         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7934         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7935         case GAUDI_EVENT_MMU_DERR:
7936         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7937                 gaudi_print_irq_info(hdev, event_type, true);
7938                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7939                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7940                 goto reset_device;
7941
7942         case GAUDI_EVENT_GIC500:
7943         case GAUDI_EVENT_AXI_ECC:
7944         case GAUDI_EVENT_L2_RAM_ECC:
7945         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7946                 gaudi_print_irq_info(hdev, event_type, false);
7947                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7948                 goto reset_device;
7949
7950         case GAUDI_EVENT_HBM0_SPI_0:
7951         case GAUDI_EVENT_HBM1_SPI_0:
7952         case GAUDI_EVENT_HBM2_SPI_0:
7953         case GAUDI_EVENT_HBM3_SPI_0:
7954                 gaudi_print_irq_info(hdev, event_type, false);
7955                 gaudi_hbm_read_interrupts(hdev,
7956                                 gaudi_hbm_event_to_dev(event_type),
7957                                 &eq_entry->hbm_ecc_data);
7958                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7959                 goto reset_device;
7960
7961         case GAUDI_EVENT_HBM0_SPI_1:
7962         case GAUDI_EVENT_HBM1_SPI_1:
7963         case GAUDI_EVENT_HBM2_SPI_1:
7964         case GAUDI_EVENT_HBM3_SPI_1:
7965                 gaudi_print_irq_info(hdev, event_type, false);
7966                 gaudi_hbm_read_interrupts(hdev,
7967                                 gaudi_hbm_event_to_dev(event_type),
7968                                 &eq_entry->hbm_ecc_data);
7969                 hl_fw_unmask_irq(hdev, event_type);
7970                 break;
7971
7972         case GAUDI_EVENT_TPC0_DEC:
7973         case GAUDI_EVENT_TPC1_DEC:
7974         case GAUDI_EVENT_TPC2_DEC:
7975         case GAUDI_EVENT_TPC3_DEC:
7976         case GAUDI_EVENT_TPC4_DEC:
7977         case GAUDI_EVENT_TPC5_DEC:
7978         case GAUDI_EVENT_TPC6_DEC:
7979         case GAUDI_EVENT_TPC7_DEC:
7980                 gaudi_print_irq_info(hdev, event_type, true);
7981                 reset_required = gaudi_tpc_read_interrupts(hdev,
7982                                         tpc_dec_event_to_tpc_id(event_type),
7983                                         "AXI_SLV_DEC_Error");
7984                 if (reset_required) {
7985                         dev_err(hdev->dev, "reset required due to %s\n",
7986                                 gaudi_irq_map_table[event_type].name);
7987
7988                         hl_device_reset(hdev, 0);
7989                 } else {
7990                         hl_fw_unmask_irq(hdev, event_type);
7991                 }
7992                 break;
7993
7994         case GAUDI_EVENT_TPC0_KRN_ERR:
7995         case GAUDI_EVENT_TPC1_KRN_ERR:
7996         case GAUDI_EVENT_TPC2_KRN_ERR:
7997         case GAUDI_EVENT_TPC3_KRN_ERR:
7998         case GAUDI_EVENT_TPC4_KRN_ERR:
7999         case GAUDI_EVENT_TPC5_KRN_ERR:
8000         case GAUDI_EVENT_TPC6_KRN_ERR:
8001         case GAUDI_EVENT_TPC7_KRN_ERR:
8002                 gaudi_print_irq_info(hdev, event_type, true);
8003                 reset_required = gaudi_tpc_read_interrupts(hdev,
8004                                         tpc_krn_event_to_tpc_id(event_type),
8005                                         "KRN_ERR");
8006                 if (reset_required) {
8007                         dev_err(hdev->dev, "reset required due to %s\n",
8008                                 gaudi_irq_map_table[event_type].name);
8009
8010                         hl_device_reset(hdev, 0);
8011                 } else {
8012                         hl_fw_unmask_irq(hdev, event_type);
8013                 }
8014                 break;
8015
8016         case GAUDI_EVENT_PCIE_CORE_SERR:
8017         case GAUDI_EVENT_PCIE_IF_SERR:
8018         case GAUDI_EVENT_PCIE_PHY_SERR:
8019         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8020         case GAUDI_EVENT_MME0_ACC_SERR:
8021         case GAUDI_EVENT_MME0_SBAB_SERR:
8022         case GAUDI_EVENT_MME1_ACC_SERR:
8023         case GAUDI_EVENT_MME1_SBAB_SERR:
8024         case GAUDI_EVENT_MME2_ACC_SERR:
8025         case GAUDI_EVENT_MME2_SBAB_SERR:
8026         case GAUDI_EVENT_MME3_ACC_SERR:
8027         case GAUDI_EVENT_MME3_SBAB_SERR:
8028         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8029         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8030         case GAUDI_EVENT_PSOC_MEM_SERR:
8031         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8032         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8033         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8034         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8035                 fallthrough;
8036         case GAUDI_EVENT_MMU_SERR:
8037                 gaudi_print_irq_info(hdev, event_type, true);
8038                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8039                 hl_fw_unmask_irq(hdev, event_type);
8040                 break;
8041
8042         case GAUDI_EVENT_PCIE_DEC:
8043         case GAUDI_EVENT_MME0_WBC_RSP:
8044         case GAUDI_EVENT_MME0_SBAB0_RSP:
8045         case GAUDI_EVENT_MME1_WBC_RSP:
8046         case GAUDI_EVENT_MME1_SBAB0_RSP:
8047         case GAUDI_EVENT_MME2_WBC_RSP:
8048         case GAUDI_EVENT_MME2_SBAB0_RSP:
8049         case GAUDI_EVENT_MME3_WBC_RSP:
8050         case GAUDI_EVENT_MME3_SBAB0_RSP:
8051         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8052         case GAUDI_EVENT_PSOC_AXI_DEC:
8053         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8054         case GAUDI_EVENT_MMU_PAGE_FAULT:
8055         case GAUDI_EVENT_MMU_WR_PERM:
8056         case GAUDI_EVENT_RAZWI_OR_ADC:
8057         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8058         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8059         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8060                 fallthrough;
8061         case GAUDI_EVENT_NIC0_QM0:
8062         case GAUDI_EVENT_NIC0_QM1:
8063         case GAUDI_EVENT_NIC1_QM0:
8064         case GAUDI_EVENT_NIC1_QM1:
8065         case GAUDI_EVENT_NIC2_QM0:
8066         case GAUDI_EVENT_NIC2_QM1:
8067         case GAUDI_EVENT_NIC3_QM0:
8068         case GAUDI_EVENT_NIC3_QM1:
8069         case GAUDI_EVENT_NIC4_QM0:
8070         case GAUDI_EVENT_NIC4_QM1:
8071         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8072                 gaudi_print_irq_info(hdev, event_type, true);
8073                 gaudi_handle_qman_err(hdev, event_type);
8074                 hl_fw_unmask_irq(hdev, event_type);
8075                 break;
8076
8077         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8078                 gaudi_print_irq_info(hdev, event_type, true);
8079                 goto reset_device;
8080
8081         case GAUDI_EVENT_TPC0_BMON_SPMU:
8082         case GAUDI_EVENT_TPC1_BMON_SPMU:
8083         case GAUDI_EVENT_TPC2_BMON_SPMU:
8084         case GAUDI_EVENT_TPC3_BMON_SPMU:
8085         case GAUDI_EVENT_TPC4_BMON_SPMU:
8086         case GAUDI_EVENT_TPC5_BMON_SPMU:
8087         case GAUDI_EVENT_TPC6_BMON_SPMU:
8088         case GAUDI_EVENT_TPC7_BMON_SPMU:
8089         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8090                 gaudi_print_irq_info(hdev, event_type, false);
8091                 hl_fw_unmask_irq(hdev, event_type);
8092                 break;
8093
8094         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8095                 gaudi_print_irq_info(hdev, event_type, false);
8096                 gaudi_print_sm_sei_info(hdev, event_type,
8097                                         &eq_entry->sm_sei_data);
8098                 rc = hl_state_dump(hdev);
8099                 if (rc)
8100                         dev_err(hdev->dev,
8101                                 "Error during system state dump %d\n", rc);
8102                 hl_fw_unmask_irq(hdev, event_type);
8103                 break;
8104
8105         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8106                 gaudi_print_clk_change_info(hdev, event_type);
8107                 hl_fw_unmask_irq(hdev, event_type);
8108                 break;
8109
8110         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8111                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8112                 dev_err(hdev->dev,
8113                         "Received high temp H/W interrupt %d (cause %d)\n",
8114                         event_type, cause);
8115                 break;
8116
8117         case GAUDI_EVENT_DEV_RESET_REQ:
8118                 gaudi_print_irq_info(hdev, event_type, false);
8119                 goto reset_device;
8120
8121         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8122                 gaudi_print_irq_info(hdev, event_type, false);
8123                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8124                 goto reset_device;
8125
8126         case GAUDI_EVENT_FW_ALIVE_S:
8127                 gaudi_print_irq_info(hdev, event_type, false);
8128                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8129                 goto reset_device;
8130
8131         default:
8132                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8133                                 event_type);
8134                 break;
8135         }
8136
8137         return;
8138
8139 reset_device:
8140         if (hdev->asic_prop.fw_security_enabled)
8141                 hl_device_reset(hdev, HL_DRV_RESET_HARD
8142                                         | HL_DRV_RESET_BYPASS_REQ_TO_FW
8143                                         | fw_fatal_err_flag);
8144         else if (hdev->hard_reset_on_fw_events)
8145                 hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
8146         else
8147                 hl_fw_unmask_irq(hdev, event_type);
8148 }
8149
8150 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8151                                         u32 *size)
8152 {
8153         struct gaudi_device *gaudi = hdev->asic_specific;
8154
8155         if (aggregate) {
8156                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8157                 return gaudi->events_stat_aggregate;
8158         }
8159
8160         *size = (u32) sizeof(gaudi->events_stat);
8161         return gaudi->events_stat;
8162 }
8163
8164 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8165                                         u32 flags)
8166 {
8167         struct gaudi_device *gaudi = hdev->asic_specific;
8168         u32 status, timeout_usec;
8169         int rc;
8170
8171         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8172                 hdev->reset_info.hard_reset_pending)
8173                 return 0;
8174
8175         if (hdev->pldm)
8176                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8177         else
8178                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8179
8180         /* L0 & L1 invalidation */
8181         WREG32(mmSTLB_INV_PS, 3);
8182         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8183         WREG32(mmSTLB_INV_PS, 2);
8184
8185         rc = hl_poll_timeout(
8186                 hdev,
8187                 mmSTLB_INV_PS,
8188                 status,
8189                 !status,
8190                 1000,
8191                 timeout_usec);
8192
8193         WREG32(mmSTLB_INV_SET, 0);
8194
8195         return rc;
8196 }
8197
8198 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8199                                                 bool is_hard, u32 flags,
8200                                                 u32 asid, u64 va, u64 size)
8201 {
8202         /* Treat as invalidate all because there is no range invalidation
8203          * in Gaudi
8204          */
8205         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8206 }
8207
8208 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8209                                         u32 asid, u64 phys_addr)
8210 {
8211         u32 status, timeout_usec;
8212         int rc;
8213
8214         if (hdev->pldm)
8215                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8216         else
8217                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8218
8219         WREG32(MMU_ASID, asid);
8220         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8221         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8222         WREG32(MMU_BUSY, 0x80000000);
8223
8224         rc = hl_poll_timeout(
8225                 hdev,
8226                 MMU_BUSY,
8227                 status,
8228                 !(status & 0x80000000),
8229                 1000,
8230                 timeout_usec);
8231
8232         if (rc) {
8233                 dev_err(hdev->dev,
8234                         "Timeout during MMU hop0 config of asid %d\n", asid);
8235                 return rc;
8236         }
8237
8238         return 0;
8239 }
8240
8241 static int gaudi_send_heartbeat(struct hl_device *hdev)
8242 {
8243         struct gaudi_device *gaudi = hdev->asic_specific;
8244
8245         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8246                 return 0;
8247
8248         return hl_fw_send_heartbeat(hdev);
8249 }
8250
8251 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8252 {
8253         struct gaudi_device *gaudi = hdev->asic_specific;
8254         struct asic_fixed_properties *prop = &hdev->asic_prop;
8255         int rc;
8256
8257         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8258                 return 0;
8259
8260         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8261                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8262                                         mmCPU_BOOT_ERR1);
8263         if (rc)
8264                 return rc;
8265
8266         if (!strlen(prop->cpucp_info.card_name))
8267                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8268                                 CARD_NAME_MAX_LEN);
8269
8270         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8271
8272         set_default_power_values(hdev);
8273
8274         hdev->max_power = prop->max_power_default;
8275
8276         return 0;
8277 }
8278
8279 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8280                                         u8 mask_len, struct seq_file *s)
8281 {
8282         struct gaudi_device *gaudi = hdev->asic_specific;
8283         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8284         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8285         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8286         unsigned long *mask = (unsigned long *)mask_arr;
8287         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8288         bool is_idle = true, is_eng_idle, is_slave;
8289         u64 offset;
8290         int i, dma_id, port;
8291
8292         if (s)
8293                 seq_puts(s,
8294                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8295                         "---  -------  ------------  ----------  -------------\n");
8296
8297         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8298                 dma_id = gaudi_dma_assignment[i];
8299                 offset = dma_id * DMA_QMAN_OFFSET;
8300
8301                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8302                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8303                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8304                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8305                                 IS_DMA_IDLE(dma_core_sts0);
8306                 is_idle &= is_eng_idle;
8307
8308                 if (mask && !is_eng_idle)
8309                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8310                 if (s)
8311                         seq_printf(s, fmt, dma_id,
8312                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8313                                 qm_cgm_sts, dma_core_sts0);
8314         }
8315
8316         if (s)
8317                 seq_puts(s,
8318                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8319                         "---  -------  ------------  ----------  ----------\n");
8320
8321         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8322                 offset = i * TPC_QMAN_OFFSET;
8323                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8324                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8325                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8326                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8327                                 IS_TPC_IDLE(tpc_cfg_sts);
8328                 is_idle &= is_eng_idle;
8329
8330                 if (mask && !is_eng_idle)
8331                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8332                 if (s)
8333                         seq_printf(s, fmt, i,
8334                                 is_eng_idle ? "Y" : "N",
8335                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8336         }
8337
8338         if (s)
8339                 seq_puts(s,
8340                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8341                         "---  -------  ------------  ----------  -----------\n");
8342
8343         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8344                 offset = i * MME_QMAN_OFFSET;
8345                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8346                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8347
8348                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8349                 is_slave = i % 2;
8350                 if (!is_slave) {
8351                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8352                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8353                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8354                 }
8355
8356                 is_idle &= is_eng_idle;
8357
8358                 if (mask && !is_eng_idle)
8359                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8360                 if (s) {
8361                         if (!is_slave)
8362                                 seq_printf(s, fmt, i,
8363                                         is_eng_idle ? "Y" : "N",
8364                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8365                         else
8366                                 seq_printf(s, mme_slave_fmt, i,
8367                                         is_eng_idle ? "Y" : "N", "-",
8368                                         "-", mme_arch_sts);
8369                 }
8370         }
8371
8372         if (s)
8373                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8374                                 "---  -------  ------------  ----------\n");
8375
8376         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8377                 offset = i * NIC_MACRO_QMAN_OFFSET;
8378                 port = 2 * i;
8379                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8380                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8381                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8382                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8383                         is_idle &= is_eng_idle;
8384
8385                         if (mask && !is_eng_idle)
8386                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8387                         if (s)
8388                                 seq_printf(s, nic_fmt, port,
8389                                                 is_eng_idle ? "Y" : "N",
8390                                                 qm_glbl_sts0, qm_cgm_sts);
8391                 }
8392
8393                 port = 2 * i + 1;
8394                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8395                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8396                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8397                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8398                         is_idle &= is_eng_idle;
8399
8400                         if (mask && !is_eng_idle)
8401                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8402                         if (s)
8403                                 seq_printf(s, nic_fmt, port,
8404                                                 is_eng_idle ? "Y" : "N",
8405                                                 qm_glbl_sts0, qm_cgm_sts);
8406                 }
8407         }
8408
8409         if (s)
8410                 seq_puts(s, "\n");
8411
8412         return is_idle;
8413 }
8414
8415 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8416         __acquires(&gaudi->hw_queues_lock)
8417 {
8418         struct gaudi_device *gaudi = hdev->asic_specific;
8419
8420         spin_lock(&gaudi->hw_queues_lock);
8421 }
8422
8423 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8424         __releases(&gaudi->hw_queues_lock)
8425 {
8426         struct gaudi_device *gaudi = hdev->asic_specific;
8427
8428         spin_unlock(&gaudi->hw_queues_lock);
8429 }
8430
8431 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8432 {
8433         return hdev->pdev->device;
8434 }
8435
8436 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8437                                 size_t max_size)
8438 {
8439         struct gaudi_device *gaudi = hdev->asic_specific;
8440
8441         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8442                 return 0;
8443
8444         return hl_fw_get_eeprom_data(hdev, data, max_size);
8445 }
8446
8447 /*
8448  * this function should be used only during initialization and/or after reset,
8449  * when there are no active users.
8450  */
8451 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8452 {
8453         u64 kernel_timeout;
8454         u32 status, offset;
8455         int rc;
8456
8457         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8458
8459         if (hdev->pldm)
8460                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8461         else
8462                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8463
8464         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8465                         lower_32_bits(tpc_kernel));
8466         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8467                         upper_32_bits(tpc_kernel));
8468
8469         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8470                         lower_32_bits(tpc_kernel));
8471         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8472                         upper_32_bits(tpc_kernel));
8473         /* set a valid LUT pointer, content is of no significance */
8474         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8475                         lower_32_bits(tpc_kernel));
8476         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8477                         upper_32_bits(tpc_kernel));
8478
8479         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8480                         lower_32_bits(CFG_BASE +
8481                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8482
8483         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8484                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8485                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8486         /* wait a bit for the engine to start executing */
8487         usleep_range(1000, 1500);
8488
8489         /* wait until engine has finished executing */
8490         rc = hl_poll_timeout(
8491                 hdev,
8492                 mmTPC0_CFG_STATUS + offset,
8493                 status,
8494                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8495                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8496                 1000,
8497                 kernel_timeout);
8498
8499         if (rc) {
8500                 dev_err(hdev->dev,
8501                         "Timeout while waiting for TPC%d icache prefetch\n",
8502                         tpc_id);
8503                 return -EIO;
8504         }
8505
8506         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8507                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8508
8509         /* wait a bit for the engine to start executing */
8510         usleep_range(1000, 1500);
8511
8512         /* wait until engine has finished executing */
8513         rc = hl_poll_timeout(
8514                 hdev,
8515                 mmTPC0_CFG_STATUS + offset,
8516                 status,
8517                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8518                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8519                 1000,
8520                 kernel_timeout);
8521
8522         if (rc) {
8523                 dev_err(hdev->dev,
8524                         "Timeout while waiting for TPC%d vector pipe\n",
8525                         tpc_id);
8526                 return -EIO;
8527         }
8528
8529         rc = hl_poll_timeout(
8530                 hdev,
8531                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8532                 status,
8533                 (status == 0),
8534                 1000,
8535                 kernel_timeout);
8536
8537         if (rc) {
8538                 dev_err(hdev->dev,
8539                         "Timeout while waiting for TPC%d kernel to execute\n",
8540                         tpc_id);
8541                 return -EIO;
8542         }
8543
8544         return 0;
8545 }
8546
8547 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8548                 struct hl_ctx *ctx)
8549 {
8550         struct gaudi_device *gaudi = hdev->asic_specific;
8551         int min_alloc_order, rc, collective_cb_size;
8552
8553         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8554                 return 0;
8555
8556         hdev->internal_cb_pool_virt_addr =
8557                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8558                                         HOST_SPACE_INTERNAL_CB_SZ,
8559                                         &hdev->internal_cb_pool_dma_addr,
8560                                         GFP_KERNEL | __GFP_ZERO);
8561
8562         if (!hdev->internal_cb_pool_virt_addr)
8563                 return -ENOMEM;
8564
8565         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8566                         sizeof(struct packet_fence);
8567         min_alloc_order = ilog2(collective_cb_size);
8568
8569         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8570         if (!hdev->internal_cb_pool) {
8571                 dev_err(hdev->dev,
8572                         "Failed to create internal CB pool\n");
8573                 rc = -ENOMEM;
8574                 goto free_internal_cb_pool;
8575         }
8576
8577         rc = gen_pool_add(hdev->internal_cb_pool,
8578                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8579                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8580         if (rc) {
8581                 dev_err(hdev->dev,
8582                         "Failed to add memory to internal CB pool\n");
8583                 rc = -EFAULT;
8584                 goto destroy_internal_cb_pool;
8585         }
8586
8587         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8588                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8589                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8590
8591         if (!hdev->internal_cb_va_base) {
8592                 rc = -ENOMEM;
8593                 goto destroy_internal_cb_pool;
8594         }
8595
8596         mutex_lock(&ctx->mmu_lock);
8597         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8598                         hdev->internal_cb_pool_dma_addr,
8599                         HOST_SPACE_INTERNAL_CB_SZ);
8600
8601         hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8602         mutex_unlock(&ctx->mmu_lock);
8603
8604         if (rc)
8605                 goto unreserve_internal_cb_pool;
8606
8607         return 0;
8608
8609 unreserve_internal_cb_pool:
8610         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8611                         HOST_SPACE_INTERNAL_CB_SZ);
8612 destroy_internal_cb_pool:
8613         gen_pool_destroy(hdev->internal_cb_pool);
8614 free_internal_cb_pool:
8615         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8616                         HOST_SPACE_INTERNAL_CB_SZ,
8617                         hdev->internal_cb_pool_virt_addr,
8618                         hdev->internal_cb_pool_dma_addr);
8619
8620         return rc;
8621 }
8622
8623 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8624                 struct hl_ctx *ctx)
8625 {
8626         struct gaudi_device *gaudi = hdev->asic_specific;
8627
8628         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8629                 return;
8630
8631         mutex_lock(&ctx->mmu_lock);
8632         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8633                         HOST_SPACE_INTERNAL_CB_SZ);
8634         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8635                         HOST_SPACE_INTERNAL_CB_SZ);
8636         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8637         mutex_unlock(&ctx->mmu_lock);
8638
8639         gen_pool_destroy(hdev->internal_cb_pool);
8640
8641         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8642                         HOST_SPACE_INTERNAL_CB_SZ,
8643                         hdev->internal_cb_pool_virt_addr,
8644                         hdev->internal_cb_pool_dma_addr);
8645 }
8646
8647 static int gaudi_ctx_init(struct hl_ctx *ctx)
8648 {
8649         int rc;
8650
8651         if (ctx->asid == HL_KERNEL_ASID_ID)
8652                 return 0;
8653
8654         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8655         if (rc)
8656                 return rc;
8657
8658         rc = gaudi_restore_user_registers(ctx->hdev);
8659         if (rc)
8660                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8661
8662         return rc;
8663 }
8664
8665 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8666 {
8667         if (ctx->asid == HL_KERNEL_ASID_ID)
8668                 return;
8669
8670         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8671 }
8672
8673 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8674 {
8675         return gaudi_cq_assignment[cq_idx];
8676 }
8677
8678 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8679 {
8680         return sizeof(struct packet_msg_short) +
8681                         sizeof(struct packet_msg_prot) * 2;
8682 }
8683
8684 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8685 {
8686         return sizeof(struct packet_msg_short) * 4 +
8687                         sizeof(struct packet_fence) +
8688                         sizeof(struct packet_msg_prot) * 2;
8689 }
8690
8691 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8692 {
8693         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8694 }
8695
8696 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8697                                 u32 size, bool eb)
8698 {
8699         struct hl_cb *cb = (struct hl_cb *) data;
8700         struct packet_msg_short *pkt;
8701         u32 value, ctl, pkt_size = sizeof(*pkt);
8702
8703         pkt = cb->kernel_address + size;
8704         memset(pkt, 0, pkt_size);
8705
8706         /* Inc by 1, Mode ADD */
8707         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8708         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8709
8710         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8711         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8712         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8713         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8714         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8715         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8716         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8717
8718         pkt->value = cpu_to_le32(value);
8719         pkt->ctl = cpu_to_le32(ctl);
8720
8721         return size + pkt_size;
8722 }
8723
8724 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8725                                         u16 addr)
8726 {
8727         u32 ctl, pkt_size = sizeof(*pkt);
8728
8729         memset(pkt, 0, pkt_size);
8730
8731         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8732         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8733         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8734         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8735         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8736         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8737
8738         pkt->value = cpu_to_le32(value);
8739         pkt->ctl = cpu_to_le32(ctl);
8740
8741         return pkt_size;
8742 }
8743
8744 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8745                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8746                 u16 sob_val, u16 mon_id)
8747 {
8748         u64 monitor_base;
8749         u32 ctl, value, pkt_size = sizeof(*pkt);
8750         u16 msg_addr_offset;
8751         u8 mask;
8752
8753         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8754                 dev_err(hdev->dev,
8755                         "sob_base %u (mask %#x) is not valid\n",
8756                         sob_base, sob_mask);
8757                 return 0;
8758         }
8759
8760         /*
8761          * monitor_base should be the content of the base0 address registers,
8762          * so it will be added to the msg short offsets
8763          */
8764         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8765
8766         msg_addr_offset =
8767                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8768                                 monitor_base;
8769
8770         memset(pkt, 0, pkt_size);
8771
8772         /* Monitor config packet: bind the monitor to a sync object */
8773         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8774         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8775         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8776                         0); /* GREATER OR EQUAL*/
8777         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8778
8779         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8780         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8781         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8782         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8783         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8784         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8785         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8786
8787         pkt->value = cpu_to_le32(value);
8788         pkt->ctl = cpu_to_le32(ctl);
8789
8790         return pkt_size;
8791 }
8792
8793 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8794 {
8795         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8796
8797         memset(pkt, 0, pkt_size);
8798
8799         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8800         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8801         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8802
8803         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8804         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8805         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8806         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8807
8808         pkt->cfg = cpu_to_le32(cfg);
8809         pkt->ctl = cpu_to_le32(ctl);
8810
8811         return pkt_size;
8812 }
8813
8814 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8815 {
8816         u32 offset, nic_index;
8817
8818         switch (queue_id) {
8819         case GAUDI_QUEUE_ID_DMA_0_0:
8820                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8821                 break;
8822         case GAUDI_QUEUE_ID_DMA_0_1:
8823                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8824                 break;
8825         case GAUDI_QUEUE_ID_DMA_0_2:
8826                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8827                 break;
8828         case GAUDI_QUEUE_ID_DMA_0_3:
8829                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8830                 break;
8831         case GAUDI_QUEUE_ID_DMA_1_0:
8832                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8833                 break;
8834         case GAUDI_QUEUE_ID_DMA_1_1:
8835                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8836                 break;
8837         case GAUDI_QUEUE_ID_DMA_1_2:
8838                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8839                 break;
8840         case GAUDI_QUEUE_ID_DMA_1_3:
8841                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8842                 break;
8843         case GAUDI_QUEUE_ID_DMA_5_0:
8844                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8845                 break;
8846         case GAUDI_QUEUE_ID_DMA_5_1:
8847                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8848                 break;
8849         case GAUDI_QUEUE_ID_DMA_5_2:
8850                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8851                 break;
8852         case GAUDI_QUEUE_ID_DMA_5_3:
8853                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8854                 break;
8855         case GAUDI_QUEUE_ID_TPC_7_0:
8856                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8857                 break;
8858         case GAUDI_QUEUE_ID_TPC_7_1:
8859                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8860                 break;
8861         case GAUDI_QUEUE_ID_TPC_7_2:
8862                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8863                 break;
8864         case GAUDI_QUEUE_ID_TPC_7_3:
8865                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8866                 break;
8867         case GAUDI_QUEUE_ID_NIC_0_0:
8868         case GAUDI_QUEUE_ID_NIC_1_0:
8869         case GAUDI_QUEUE_ID_NIC_2_0:
8870         case GAUDI_QUEUE_ID_NIC_3_0:
8871         case GAUDI_QUEUE_ID_NIC_4_0:
8872         case GAUDI_QUEUE_ID_NIC_5_0:
8873         case GAUDI_QUEUE_ID_NIC_6_0:
8874         case GAUDI_QUEUE_ID_NIC_7_0:
8875         case GAUDI_QUEUE_ID_NIC_8_0:
8876         case GAUDI_QUEUE_ID_NIC_9_0:
8877                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8878                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8879                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8880                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8881                 break;
8882         case GAUDI_QUEUE_ID_NIC_0_1:
8883         case GAUDI_QUEUE_ID_NIC_1_1:
8884         case GAUDI_QUEUE_ID_NIC_2_1:
8885         case GAUDI_QUEUE_ID_NIC_3_1:
8886         case GAUDI_QUEUE_ID_NIC_4_1:
8887         case GAUDI_QUEUE_ID_NIC_5_1:
8888         case GAUDI_QUEUE_ID_NIC_6_1:
8889         case GAUDI_QUEUE_ID_NIC_7_1:
8890         case GAUDI_QUEUE_ID_NIC_8_1:
8891         case GAUDI_QUEUE_ID_NIC_9_1:
8892                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8893                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8894                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8895                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8896                 break;
8897         case GAUDI_QUEUE_ID_NIC_0_2:
8898         case GAUDI_QUEUE_ID_NIC_1_2:
8899         case GAUDI_QUEUE_ID_NIC_2_2:
8900         case GAUDI_QUEUE_ID_NIC_3_2:
8901         case GAUDI_QUEUE_ID_NIC_4_2:
8902         case GAUDI_QUEUE_ID_NIC_5_2:
8903         case GAUDI_QUEUE_ID_NIC_6_2:
8904         case GAUDI_QUEUE_ID_NIC_7_2:
8905         case GAUDI_QUEUE_ID_NIC_8_2:
8906         case GAUDI_QUEUE_ID_NIC_9_2:
8907                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8908                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8909                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8910                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8911                 break;
8912         case GAUDI_QUEUE_ID_NIC_0_3:
8913         case GAUDI_QUEUE_ID_NIC_1_3:
8914         case GAUDI_QUEUE_ID_NIC_2_3:
8915         case GAUDI_QUEUE_ID_NIC_3_3:
8916         case GAUDI_QUEUE_ID_NIC_4_3:
8917         case GAUDI_QUEUE_ID_NIC_5_3:
8918         case GAUDI_QUEUE_ID_NIC_6_3:
8919         case GAUDI_QUEUE_ID_NIC_7_3:
8920         case GAUDI_QUEUE_ID_NIC_8_3:
8921         case GAUDI_QUEUE_ID_NIC_9_3:
8922                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8923                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8924                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8925                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8926                 break;
8927         default:
8928                 return -EINVAL;
8929         }
8930
8931         *addr = CFG_BASE + offset;
8932
8933         return 0;
8934 }
8935
8936 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8937 {
8938         u64 monitor_base;
8939         u32 size = 0;
8940         u16 msg_addr_offset;
8941
8942         /*
8943          * monitor_base should be the content of the base0 address registers,
8944          * so it will be added to the msg short offsets
8945          */
8946         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8947
8948         /* First monitor config packet: low address of the sync */
8949         msg_addr_offset =
8950                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8951                                 monitor_base;
8952
8953         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8954                                         msg_addr_offset);
8955
8956         /* Second monitor config packet: high address of the sync */
8957         msg_addr_offset =
8958                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8959                                 monitor_base;
8960
8961         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8962                                         msg_addr_offset);
8963
8964         /*
8965          * Third monitor config packet: the payload, i.e. what to write when the
8966          * sync triggers
8967          */
8968         msg_addr_offset =
8969                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8970                                 monitor_base;
8971
8972         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8973
8974         return size;
8975 }
8976
8977 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8978                                 struct hl_gen_wait_properties *prop)
8979 {
8980         struct hl_cb *cb = (struct hl_cb *) prop->data;
8981         void *buf = cb->kernel_address;
8982         u64 fence_addr = 0;
8983         u32 size = prop->size;
8984
8985         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8986                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8987                                 prop->q_idx);
8988                 return 0;
8989         }
8990
8991         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8992         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8993                         prop->sob_mask, prop->sob_val, prop->mon_id);
8994         size += gaudi_add_fence_pkt(buf + size);
8995
8996         return size;
8997 }
8998
8999 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9000 {
9001         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9002
9003         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9004                 hw_sob->sob_id);
9005
9006         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9007                         hw_sob->sob_id * 4, 0);
9008
9009         kref_init(&hw_sob->kref);
9010 }
9011
9012 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9013 {
9014         hdev->dma_mask = 48;
9015 }
9016
9017 static u64 gaudi_get_device_time(struct hl_device *hdev)
9018 {
9019         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9020
9021         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9022 }
9023
9024 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9025                                 u32 *block_size, u32 *block_id)
9026 {
9027         return -EPERM;
9028 }
9029
9030 static int gaudi_block_mmap(struct hl_device *hdev,
9031                                 struct vm_area_struct *vma,
9032                                 u32 block_id, u32 block_size)
9033 {
9034         return -EPERM;
9035 }
9036
9037 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9038 {
9039         struct cpu_dyn_regs *dyn_regs =
9040                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9041         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9042                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9043                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9044
9045         WREG32(irq_handler_offset,
9046                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9047 }
9048
9049 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9050 {
9051         switch (pll_idx) {
9052         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9053         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9054         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9055         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9056         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9057         case HL_GAUDI_MME_PLL: return MME_PLL;
9058         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9059         case HL_GAUDI_IF_PLL: return IF_PLL;
9060         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9061         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9062         default: return -EINVAL;
9063         }
9064 }
9065
9066 static int gaudi_add_sync_to_engine_map_entry(
9067         struct hl_sync_to_engine_map *map, u32 reg_value,
9068         enum hl_sync_engine_type engine_type, u32 engine_id)
9069 {
9070         struct hl_sync_to_engine_map_entry *entry;
9071
9072         /* Reg value represents a partial address of sync object,
9073          * it is used as unique identifier. For this we need to
9074          * clear the cutoff cfg base bits from the value.
9075          */
9076         if (reg_value == 0 || reg_value == 0xffffffff)
9077                 return 0;
9078         reg_value -= (u32)CFG_BASE;
9079
9080         /* create a new hash entry */
9081         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9082         if (!entry)
9083                 return -ENOMEM;
9084         entry->engine_type = engine_type;
9085         entry->engine_id = engine_id;
9086         entry->sync_id = reg_value;
9087         hash_add(map->tb, &entry->node, reg_value);
9088
9089         return 0;
9090 }
9091
9092 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9093                                 struct hl_sync_to_engine_map *map)
9094 {
9095         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9096         int i, j, rc;
9097         u32 reg_value;
9098
9099         /* Iterate over TPC engines */
9100         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9101
9102                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9103                                         sds->props[SP_NEXT_TPC] * i);
9104
9105                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9106                                                         ENGINE_TPC, i);
9107                 if (rc)
9108                         goto free_sync_to_engine_map;
9109         }
9110
9111         /* Iterate over MME engines */
9112         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9113                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9114
9115                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9116                                                 sds->props[SP_NEXT_MME] * i +
9117                                                 j * sizeof(u32));
9118
9119                         rc = gaudi_add_sync_to_engine_map_entry(
9120                                 map, reg_value, ENGINE_MME,
9121                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9122                         if (rc)
9123                                 goto free_sync_to_engine_map;
9124                 }
9125         }
9126
9127         /* Iterate over DMA engines */
9128         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9129                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9130                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9131                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9132                                                         ENGINE_DMA, i);
9133                 if (rc)
9134                         goto free_sync_to_engine_map;
9135         }
9136
9137         return 0;
9138
9139 free_sync_to_engine_map:
9140         hl_state_dump_free_sync_to_engine_map(map);
9141
9142         return rc;
9143 }
9144
9145 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9146 {
9147         return FIELD_GET(
9148                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9149                 mon->status);
9150 }
9151
9152 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9153 {
9154         const size_t max_write = 10;
9155         u32 gid, mask, sob;
9156         int i, offset;
9157
9158         /* Sync object ID is calculated as follows:
9159          * (8 * group_id + cleared bits in mask)
9160          */
9161         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9162                         mon->arm_data);
9163         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9164                         mon->arm_data);
9165
9166         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9167                 max_write; mask >>= 1, i++) {
9168                 if (!(mask & 1)) {
9169                         sob = gid * MONITOR_MAX_SOBS + i;
9170
9171                         if (offset > 0)
9172                                 offset += snprintf(sobs + offset, max_write,
9173                                                         ", ");
9174
9175                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9176                 }
9177         }
9178 }
9179
9180 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9181                                 struct hl_device *hdev,
9182                                 struct hl_mon_state_dump *mon)
9183 {
9184         const char *name;
9185         char scratch_buf1[BIN_REG_STRING_SIZE],
9186                 scratch_buf2[BIN_REG_STRING_SIZE];
9187         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9188
9189         name = hl_state_dump_get_monitor_name(hdev, mon);
9190         if (!name)
9191                 name = "";
9192
9193         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9194
9195         return hl_snprintf_resize(
9196                 buf, size, offset,
9197                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9198                 mon->id, name,
9199                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9200                                 mon->arm_data),
9201                 hl_format_as_binary(
9202                         scratch_buf1, sizeof(scratch_buf1),
9203                         FIELD_GET(
9204                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9205                                 mon->arm_data)),
9206                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9207                                 mon->arm_data),
9208                 mon->wr_data,
9209                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9210                 hl_format_as_binary(
9211                         scratch_buf2, sizeof(scratch_buf2),
9212                         FIELD_GET(
9213                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9214                                 mon->status)),
9215                 monitored_sobs);
9216 }
9217
9218
9219 static int gaudi_print_fences_single_engine(
9220         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9221         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9222         size_t *size, size_t *offset)
9223 {
9224         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9225         int rc = -ENOMEM, i;
9226         u32 *statuses, *fences;
9227
9228         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9229                         sizeof(*statuses), GFP_KERNEL);
9230         if (!statuses)
9231                 goto out;
9232
9233         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9234                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9235                          sizeof(*fences), GFP_KERNEL);
9236         if (!fences)
9237                 goto free_status;
9238
9239         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9240                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9241
9242         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9243                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9244                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9245
9246         /* The actual print */
9247         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9248                 u32 fence_id;
9249                 u64 fence_cnt, fence_rdata;
9250                 const char *engine_name;
9251
9252                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9253                         statuses[i]))
9254                         continue;
9255
9256                 fence_id =
9257                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9258                 fence_cnt = base_offset + CFG_BASE +
9259                         sizeof(u32) *
9260                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9261                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9262                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9263                 engine_name = hl_sync_engine_to_string(engine_type);
9264
9265                 rc = hl_snprintf_resize(
9266                         buf, size, offset,
9267                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9268                         engine_name, engine_id,
9269                         i, fence_id,
9270                         fence_cnt, engine_name, engine_id, fence_id, i,
9271                         fence_rdata, engine_name, engine_id, fence_id, i,
9272                         fences[fence_id],
9273                         statuses[i]);
9274                 if (rc)
9275                         goto free_fences;
9276         }
9277
9278         rc = 0;
9279
9280 free_fences:
9281         kfree(fences);
9282 free_status:
9283         kfree(statuses);
9284 out:
9285         return rc;
9286 }
9287
9288
9289 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9290         .monitor_valid = gaudi_monitor_valid,
9291         .print_single_monitor = gaudi_print_single_monitor,
9292         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9293         .print_fences_single_engine = gaudi_print_fences_single_engine,
9294 };
9295
9296 static void gaudi_state_dump_init(struct hl_device *hdev)
9297 {
9298         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9299         int i;
9300
9301         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9302                 hash_add(sds->so_id_to_str_tb,
9303                         &gaudi_so_id_to_str[i].node,
9304                         gaudi_so_id_to_str[i].id);
9305
9306         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9307                 hash_add(sds->monitor_id_to_str_tb,
9308                         &gaudi_monitor_id_to_str[i].node,
9309                         gaudi_monitor_id_to_str[i].id);
9310
9311         sds->props = gaudi_state_dump_specs_props;
9312
9313         sds->sync_namager_names = gaudi_sync_manager_names;
9314
9315         sds->funcs = gaudi_state_dump_funcs;
9316 }
9317
9318 static u32 *gaudi_get_stream_master_qid_arr(void)
9319 {
9320         return gaudi_stream_master;
9321 }
9322
9323 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9324 {
9325         struct hl_device *hdev = dev_get_drvdata(dev);
9326         struct cpucp_info *cpucp_info;
9327
9328         cpucp_info = &hdev->asic_prop.cpucp_info;
9329
9330         return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9331 }
9332
9333 static DEVICE_ATTR_RO(infineon_ver);
9334
9335 static struct attribute *gaudi_vrm_dev_attrs[] = {
9336         &dev_attr_infineon_ver.attr,
9337 };
9338
9339 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9340                                         struct attribute_group *dev_vrm_attr_grp)
9341 {
9342         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9343         dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9344 }
9345
9346 static const struct hl_asic_funcs gaudi_funcs = {
9347         .early_init = gaudi_early_init,
9348         .early_fini = gaudi_early_fini,
9349         .late_init = gaudi_late_init,
9350         .late_fini = gaudi_late_fini,
9351         .sw_init = gaudi_sw_init,
9352         .sw_fini = gaudi_sw_fini,
9353         .hw_init = gaudi_hw_init,
9354         .hw_fini = gaudi_hw_fini,
9355         .halt_engines = gaudi_halt_engines,
9356         .suspend = gaudi_suspend,
9357         .resume = gaudi_resume,
9358         .mmap = gaudi_mmap,
9359         .ring_doorbell = gaudi_ring_doorbell,
9360         .pqe_write = gaudi_pqe_write,
9361         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9362         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9363         .scrub_device_mem = gaudi_scrub_device_mem,
9364         .get_int_queue_base = gaudi_get_int_queue_base,
9365         .test_queues = gaudi_test_queues,
9366         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9367         .asic_dma_pool_free = gaudi_dma_pool_free,
9368         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9369         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9370         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9371         .cs_parser = gaudi_cs_parser,
9372         .asic_dma_map_sg = gaudi_dma_map_sg,
9373         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9374         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9375         .update_eq_ci = gaudi_update_eq_ci,
9376         .context_switch = gaudi_context_switch,
9377         .restore_phase_topology = gaudi_restore_phase_topology,
9378         .debugfs_read32 = gaudi_debugfs_read32,
9379         .debugfs_write32 = gaudi_debugfs_write32,
9380         .debugfs_read64 = gaudi_debugfs_read64,
9381         .debugfs_write64 = gaudi_debugfs_write64,
9382         .debugfs_read_dma = gaudi_debugfs_read_dma,
9383         .add_device_attr = gaudi_add_device_attr,
9384         .handle_eqe = gaudi_handle_eqe,
9385         .get_events_stat = gaudi_get_events_stat,
9386         .read_pte = gaudi_read_pte,
9387         .write_pte = gaudi_write_pte,
9388         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9389         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9390         .send_heartbeat = gaudi_send_heartbeat,
9391         .debug_coresight = gaudi_debug_coresight,
9392         .is_device_idle = gaudi_is_device_idle,
9393         .non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
9394         .hw_queues_lock = gaudi_hw_queues_lock,
9395         .hw_queues_unlock = gaudi_hw_queues_unlock,
9396         .get_pci_id = gaudi_get_pci_id,
9397         .get_eeprom_data = gaudi_get_eeprom_data,
9398         .send_cpu_message = gaudi_send_cpu_message,
9399         .pci_bars_map = gaudi_pci_bars_map,
9400         .init_iatu = gaudi_init_iatu,
9401         .rreg = hl_rreg,
9402         .wreg = hl_wreg,
9403         .halt_coresight = gaudi_halt_coresight,
9404         .ctx_init = gaudi_ctx_init,
9405         .ctx_fini = gaudi_ctx_fini,
9406         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9407         .load_firmware_to_device = gaudi_load_firmware_to_device,
9408         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9409         .get_signal_cb_size = gaudi_get_signal_cb_size,
9410         .get_wait_cb_size = gaudi_get_wait_cb_size,
9411         .gen_signal_cb = gaudi_gen_signal_cb,
9412         .gen_wait_cb = gaudi_gen_wait_cb,
9413         .reset_sob = gaudi_reset_sob,
9414         .reset_sob_group = gaudi_reset_sob_group,
9415         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9416         .get_device_time = gaudi_get_device_time,
9417         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9418         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9419         .scramble_addr = hl_mmu_scramble_addr,
9420         .descramble_addr = hl_mmu_descramble_addr,
9421         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9422         .get_hw_block_id = gaudi_get_hw_block_id,
9423         .hw_block_mmap = gaudi_block_mmap,
9424         .enable_events_from_fw = gaudi_enable_events_from_fw,
9425         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9426         .init_firmware_loader = gaudi_init_firmware_loader,
9427         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9428         .state_dump_init = gaudi_state_dump_init,
9429         .get_sob_addr = gaudi_get_sob_addr,
9430         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9431         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9432 };
9433
9434 /**
9435  * gaudi_set_asic_funcs - set GAUDI function pointers
9436  *
9437  * @hdev: pointer to hl_device structure
9438  *
9439  */
9440 void gaudi_set_asic_funcs(struct hl_device *hdev)
9441 {
9442         hdev->asic_funcs = &gaudi_funcs;
9443 }