habanalabs: remove asic callback set_pll_profile()
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461 };
462
463 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
464                                                                 u64 phys_addr);
465 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
466                                         struct hl_cs_job *job);
467 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
468                                         u32 size, u64 val);
469 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
470                                         u32 num_regs, u32 val);
471 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
472                                 u32 tpc_id);
473 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
474 static int gaudi_cpucp_info_get(struct hl_device *hdev);
475 static void gaudi_disable_clock_gating(struct hl_device *hdev);
476 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
477 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
478                                 u32 size, bool eb);
479 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
480                                 struct hl_gen_wait_properties *prop);
481 static inline enum hl_collective_mode
482 get_collective_mode(struct hl_device *hdev, u32 queue_id)
483 {
484         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
485                 return HL_COLLECTIVE_MASTER;
486
487         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
488                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
489                 return HL_COLLECTIVE_SLAVE;
490
491         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
492                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
493                 return HL_COLLECTIVE_SLAVE;
494
495         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
496                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
497                 return HL_COLLECTIVE_SLAVE;
498
499         return HL_COLLECTIVE_NOT_SUPPORTED;
500 }
501
502 static inline void set_default_power_values(struct hl_device *hdev)
503 {
504         struct asic_fixed_properties *prop = &hdev->asic_prop;
505
506         if (hdev->card_type == cpucp_card_type_pmc) {
507                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
508
509                 if (prop->fw_security_enabled)
510                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
511                 else
512                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
513         } else {
514                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
515                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
516         }
517 }
518
519 static int gaudi_set_fixed_properties(struct hl_device *hdev)
520 {
521         struct asic_fixed_properties *prop = &hdev->asic_prop;
522         u32 num_sync_stream_queues = 0;
523         int i;
524
525         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
526         prop->hw_queues_props = kcalloc(prop->max_queues,
527                         sizeof(struct hw_queue_properties),
528                         GFP_KERNEL);
529
530         if (!prop->hw_queues_props)
531                 return -ENOMEM;
532
533         for (i = 0 ; i < prop->max_queues ; i++) {
534                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
535                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
536                         prop->hw_queues_props[i].driver_only = 0;
537                         prop->hw_queues_props[i].supports_sync_stream = 1;
538                         prop->hw_queues_props[i].cb_alloc_flags =
539                                 CB_ALLOC_KERNEL;
540                         num_sync_stream_queues++;
541                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
542                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
543                         prop->hw_queues_props[i].driver_only = 1;
544                         prop->hw_queues_props[i].supports_sync_stream = 0;
545                         prop->hw_queues_props[i].cb_alloc_flags =
546                                 CB_ALLOC_KERNEL;
547                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
548                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
549                         prop->hw_queues_props[i].driver_only = 0;
550                         prop->hw_queues_props[i].supports_sync_stream = 0;
551                         prop->hw_queues_props[i].cb_alloc_flags =
552                                 CB_ALLOC_USER;
553
554                 }
555                 prop->hw_queues_props[i].collective_mode =
556                                                 get_collective_mode(hdev, i);
557         }
558
559         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
560         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
561         prop->collective_first_sob = 0;
562         prop->collective_first_mon = 0;
563
564         /* 2 SOBs per internal queue stream are reserved for collective */
565         prop->sync_stream_first_sob =
566                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
567                         * QMAN_STREAMS * HL_RSVD_SOBS;
568
569         /* 1 monitor per internal queue stream are reserved for collective
570          * 2 monitors per external queue stream are reserved for collective
571          */
572         prop->sync_stream_first_mon =
573                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
574                         (NUMBER_OF_EXT_HW_QUEUES * 2);
575
576         prop->dram_base_address = DRAM_PHYS_BASE;
577         prop->dram_size = GAUDI_HBM_SIZE_32GB;
578         prop->dram_end_address = prop->dram_base_address +
579                                         prop->dram_size;
580         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
581
582         prop->sram_base_address = SRAM_BASE_ADDR;
583         prop->sram_size = SRAM_SIZE;
584         prop->sram_end_address = prop->sram_base_address +
585                                         prop->sram_size;
586         prop->sram_user_base_address = prop->sram_base_address +
587                                         SRAM_USER_BASE_OFFSET;
588
589         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
590         if (hdev->pldm)
591                 prop->mmu_pgt_size = 0x800000; /* 8MB */
592         else
593                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
594         prop->mmu_pte_size = HL_PTE_SIZE;
595         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
596         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
597         prop->dram_page_size = PAGE_SIZE_2MB;
598         prop->dram_supports_virtual_memory = false;
599
600         prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
601         prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
602         prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
603         prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
604         prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
605         prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
606         prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
607         prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
608         prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
609         prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
610         prop->pmmu.start_addr = VA_HOST_SPACE_START;
611         prop->pmmu.end_addr =
612                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
613         prop->pmmu.page_size = PAGE_SIZE_4KB;
614         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
615         prop->pmmu.last_mask = LAST_MASK;
616
617         /* PMMU and HPMMU are the same except of page size */
618         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620
621         /* shifts and masks are the same in PMMU and DMMU */
622         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624         prop->dmmu.end_addr = VA_HOST_SPACE_END;
625         prop->dmmu.page_size = PAGE_SIZE_2MB;
626
627         prop->cfg_size = CFG_SIZE;
628         prop->max_asid = MAX_ASID;
629         prop->num_of_events = GAUDI_EVENT_SIZE;
630         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631
632         set_default_power_values(hdev);
633
634         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636
637         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639
640         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641                                         CARD_NAME_MAX_LEN);
642
643         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644
645         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646                         prop->sync_stream_first_sob +
647                         (num_sync_stream_queues * HL_RSVD_SOBS);
648         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649                         prop->sync_stream_first_mon +
650                         (num_sync_stream_queues * HL_RSVD_MONS);
651
652         prop->first_available_user_msix_interrupt = USHRT_MAX;
653
654         for (i = 0 ; i < HL_MAX_DCORES ; i++)
655                 prop->first_available_cq[i] = USHRT_MAX;
656
657         prop->fw_cpu_boot_dev_sts0_valid = false;
658         prop->fw_cpu_boot_dev_sts1_valid = false;
659         prop->hard_reset_done_by_fw = false;
660         prop->gic_interrupts_enable = true;
661
662         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663
664         prop->clk_pll_index = HL_GAUDI_MME_PLL;
665         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
666
667         prop->use_get_power_for_reset_history = true;
668
669         return 0;
670 }
671
672 static int gaudi_pci_bars_map(struct hl_device *hdev)
673 {
674         static const char * const name[] = {"SRAM", "CFG", "HBM"};
675         bool is_wc[3] = {false, false, true};
676         int rc;
677
678         rc = hl_pci_bars_map(hdev, name, is_wc);
679         if (rc)
680                 return rc;
681
682         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
683                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
684
685         return 0;
686 }
687
688 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
689 {
690         struct gaudi_device *gaudi = hdev->asic_specific;
691         struct hl_inbound_pci_region pci_region;
692         u64 old_addr = addr;
693         int rc;
694
695         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
696                 return old_addr;
697
698         if (hdev->asic_prop.iatu_done_by_fw)
699                 return U64_MAX;
700
701         /* Inbound Region 2 - Bar 4 - Point to HBM */
702         pci_region.mode = PCI_BAR_MATCH_MODE;
703         pci_region.bar = HBM_BAR_ID;
704         pci_region.addr = addr;
705         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
706         if (rc)
707                 return U64_MAX;
708
709         if (gaudi) {
710                 old_addr = gaudi->hbm_bar_cur_addr;
711                 gaudi->hbm_bar_cur_addr = addr;
712         }
713
714         return old_addr;
715 }
716
717 static int gaudi_init_iatu(struct hl_device *hdev)
718 {
719         struct hl_inbound_pci_region inbound_region;
720         struct hl_outbound_pci_region outbound_region;
721         int rc;
722
723         if (hdev->asic_prop.iatu_done_by_fw)
724                 return 0;
725
726         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
727         inbound_region.mode = PCI_BAR_MATCH_MODE;
728         inbound_region.bar = SRAM_BAR_ID;
729         inbound_region.addr = SRAM_BASE_ADDR;
730         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
731         if (rc)
732                 goto done;
733
734         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
735         inbound_region.mode = PCI_BAR_MATCH_MODE;
736         inbound_region.bar = CFG_BAR_ID;
737         inbound_region.addr = SPI_FLASH_BASE_ADDR;
738         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
739         if (rc)
740                 goto done;
741
742         /* Inbound Region 2 - Bar 4 - Point to HBM */
743         inbound_region.mode = PCI_BAR_MATCH_MODE;
744         inbound_region.bar = HBM_BAR_ID;
745         inbound_region.addr = DRAM_PHYS_BASE;
746         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
747         if (rc)
748                 goto done;
749
750         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
751
752         /* Outbound Region 0 - Point to Host */
753         outbound_region.addr = HOST_PHYS_BASE;
754         outbound_region.size = HOST_PHYS_SIZE;
755         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
756
757 done:
758         return rc;
759 }
760
761 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
762 {
763         return RREG32(mmHW_STATE);
764 }
765
766 static int gaudi_early_init(struct hl_device *hdev)
767 {
768         struct asic_fixed_properties *prop = &hdev->asic_prop;
769         struct pci_dev *pdev = hdev->pdev;
770         u32 fw_boot_status;
771         int rc;
772
773         rc = gaudi_set_fixed_properties(hdev);
774         if (rc) {
775                 dev_err(hdev->dev, "Failed setting fixed properties\n");
776                 return rc;
777         }
778
779         /* Check BAR sizes */
780         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
781                 dev_err(hdev->dev,
782                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
783                         SRAM_BAR_ID,
784                         (unsigned long long) pci_resource_len(pdev,
785                                                         SRAM_BAR_ID),
786                         SRAM_BAR_SIZE);
787                 rc = -ENODEV;
788                 goto free_queue_props;
789         }
790
791         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
792                 dev_err(hdev->dev,
793                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
794                         CFG_BAR_ID,
795                         (unsigned long long) pci_resource_len(pdev,
796                                                                 CFG_BAR_ID),
797                         CFG_BAR_SIZE);
798                 rc = -ENODEV;
799                 goto free_queue_props;
800         }
801
802         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
803         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
804
805         /* If FW security is enabled at this point it means no access to ELBI */
806         if (hdev->asic_prop.fw_security_enabled) {
807                 hdev->asic_prop.iatu_done_by_fw = true;
808
809                 /*
810                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
811                  * decision can only be taken based on PCI ID security.
812                  */
813                 hdev->asic_prop.gic_interrupts_enable = false;
814                 goto pci_init;
815         }
816
817         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
818                                 &fw_boot_status);
819         if (rc)
820                 goto free_queue_props;
821
822         /* Check whether FW is configuring iATU */
823         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
824                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
825                 hdev->asic_prop.iatu_done_by_fw = true;
826
827 pci_init:
828         rc = hl_pci_init(hdev);
829         if (rc)
830                 goto free_queue_props;
831
832         /* Before continuing in the initialization, we need to read the preboot
833          * version to determine whether we run with a security-enabled firmware
834          */
835         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
836                                         mmCPU_BOOT_DEV_STS0,
837                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
838                                         mmCPU_BOOT_ERR1,
839                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
840         if (rc) {
841                 if (hdev->reset_on_preboot_fail)
842                         hdev->asic_funcs->hw_fini(hdev, true, false);
843                 goto pci_fini;
844         }
845
846         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
847                 dev_info(hdev->dev,
848                         "H/W state is dirty, must reset before initializing\n");
849                 hdev->asic_funcs->hw_fini(hdev, true, false);
850         }
851
852         return 0;
853
854 pci_fini:
855         hl_pci_fini(hdev);
856 free_queue_props:
857         kfree(hdev->asic_prop.hw_queues_props);
858         return rc;
859 }
860
861 static int gaudi_early_fini(struct hl_device *hdev)
862 {
863         kfree(hdev->asic_prop.hw_queues_props);
864         hl_pci_fini(hdev);
865
866         return 0;
867 }
868
869 /**
870  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
871  *
872  * @hdev: pointer to hl_device structure
873  *
874  */
875 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
876 {
877         struct asic_fixed_properties *prop = &hdev->asic_prop;
878         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
879         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
880         int rc;
881
882         if (hdev->asic_prop.fw_security_enabled) {
883                 struct gaudi_device *gaudi = hdev->asic_specific;
884
885                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
886                         return 0;
887
888                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
889
890                 if (rc)
891                         return rc;
892
893                 freq = pll_freq_arr[2];
894         } else {
895                 /* Backward compatibility */
896                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
897                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
898                 nr = RREG32(mmPSOC_CPU_PLL_NR);
899                 nf = RREG32(mmPSOC_CPU_PLL_NF);
900                 od = RREG32(mmPSOC_CPU_PLL_OD);
901
902                 if (div_sel == DIV_SEL_REF_CLK ||
903                                 div_sel == DIV_SEL_DIVIDED_REF) {
904                         if (div_sel == DIV_SEL_REF_CLK)
905                                 freq = PLL_REF_CLK;
906                         else
907                                 freq = PLL_REF_CLK / (div_fctr + 1);
908                 } else if (div_sel == DIV_SEL_PLL_CLK ||
909                         div_sel == DIV_SEL_DIVIDED_PLL) {
910                         pll_clk = PLL_REF_CLK * (nf + 1) /
911                                         ((nr + 1) * (od + 1));
912                         if (div_sel == DIV_SEL_PLL_CLK)
913                                 freq = pll_clk;
914                         else
915                                 freq = pll_clk / (div_fctr + 1);
916                 } else {
917                         dev_warn(hdev->dev,
918                                 "Received invalid div select value: %d",
919                                 div_sel);
920                         freq = 0;
921                 }
922         }
923
924         prop->psoc_timestamp_frequency = freq;
925         prop->psoc_pci_pll_nr = nr;
926         prop->psoc_pci_pll_nf = nf;
927         prop->psoc_pci_pll_od = od;
928         prop->psoc_pci_pll_div_factor = div_fctr;
929
930         return 0;
931 }
932
933 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
934                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
935 {
936         struct asic_fixed_properties *prop = &hdev->asic_prop;
937         struct packet_lin_dma *init_tpc_mem_pkt;
938         struct hl_cs_job *job;
939         struct hl_cb *cb;
940         u64 dst_addr;
941         u32 cb_size, ctl;
942         u8 tpc_id;
943         int rc;
944
945         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
946         if (!cb)
947                 return -EFAULT;
948
949         init_tpc_mem_pkt = cb->kernel_address;
950         cb_size = sizeof(*init_tpc_mem_pkt);
951         memset(init_tpc_mem_pkt, 0, cb_size);
952
953         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
954
955         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
956         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
957         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
958         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
959
960         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
961
962         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
963         dst_addr = (prop->sram_user_base_address &
964                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
965                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
966         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
967
968         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
969         if (!job) {
970                 dev_err(hdev->dev, "Failed to allocate a new job\n");
971                 rc = -ENOMEM;
972                 goto release_cb;
973         }
974
975         job->id = 0;
976         job->user_cb = cb;
977         atomic_inc(&job->user_cb->cs_cnt);
978         job->user_cb_size = cb_size;
979         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
980         job->patched_cb = job->user_cb;
981         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
982
983         hl_debugfs_add_job(hdev, job);
984
985         rc = gaudi_send_job_on_qman0(hdev, job);
986
987         if (rc)
988                 goto free_job;
989
990         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
991                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
992                 if (rc)
993                         break;
994         }
995
996 free_job:
997         hl_userptr_delete_list(hdev, &job->userptr_list);
998         hl_debugfs_remove_job(hdev, job);
999         kfree(job);
1000         atomic_dec(&cb->cs_cnt);
1001
1002 release_cb:
1003         hl_cb_put(cb);
1004         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1005
1006         return rc;
1007 }
1008
1009 /*
1010  * gaudi_init_tpc_mem() - Initialize TPC memories.
1011  * @hdev: Pointer to hl_device structure.
1012  *
1013  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1014  *
1015  * Return: 0 for success, negative value for error.
1016  */
1017 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1018 {
1019         const struct firmware *fw;
1020         size_t fw_size;
1021         void *cpu_addr;
1022         dma_addr_t dma_handle;
1023         int rc, count = 5;
1024
1025 again:
1026         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1027         if (rc == -EINTR && count-- > 0) {
1028                 msleep(50);
1029                 goto again;
1030         }
1031
1032         if (rc) {
1033                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1034                                 GAUDI_TPC_FW_FILE);
1035                 goto out;
1036         }
1037
1038         fw_size = fw->size;
1039         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1040                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1041         if (!cpu_addr) {
1042                 dev_err(hdev->dev,
1043                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1044                         fw_size);
1045                 rc = -ENOMEM;
1046                 goto out;
1047         }
1048
1049         memcpy(cpu_addr, fw->data, fw_size);
1050
1051         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1052
1053         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1054                         dma_handle);
1055
1056 out:
1057         release_firmware(fw);
1058         return rc;
1059 }
1060
1061 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1062 {
1063         struct gaudi_device *gaudi = hdev->asic_specific;
1064         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1065         struct hl_hw_queue *q;
1066         u32 i, sob_id, sob_group_id, queue_id;
1067
1068         /* Iterate through SOB groups and assign a SOB for each slave queue */
1069         sob_group_id =
1070                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1071         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1072
1073         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1074         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1075                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1076                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1077         }
1078
1079         /* Both DMA5 and TPC7 use the same resources since only a single
1080          * engine need to participate in the reduction process
1081          */
1082         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1083         q = &hdev->kernel_queues[queue_id];
1084         q->sync_stream_prop.collective_sob_id =
1085                         sob_id + NIC_NUMBER_OF_ENGINES;
1086
1087         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1088         q = &hdev->kernel_queues[queue_id];
1089         q->sync_stream_prop.collective_sob_id =
1090                         sob_id + NIC_NUMBER_OF_ENGINES;
1091 }
1092
1093 static void gaudi_sob_group_hw_reset(struct kref *ref)
1094 {
1095         struct gaudi_hw_sob_group *hw_sob_group =
1096                 container_of(ref, struct gaudi_hw_sob_group, kref);
1097         struct hl_device *hdev = hw_sob_group->hdev;
1098         int i;
1099
1100         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1101                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1102                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1103
1104         kref_init(&hw_sob_group->kref);
1105 }
1106
1107 static void gaudi_sob_group_reset_error(struct kref *ref)
1108 {
1109         struct gaudi_hw_sob_group *hw_sob_group =
1110                 container_of(ref, struct gaudi_hw_sob_group, kref);
1111         struct hl_device *hdev = hw_sob_group->hdev;
1112
1113         dev_crit(hdev->dev,
1114                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1115                 hw_sob_group->base_sob_id);
1116 }
1117
1118 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1119 {
1120         struct gaudi_collective_properties *prop;
1121         int i;
1122
1123         prop = &gaudi->collective_props;
1124
1125         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1126
1127         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1128                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1129                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1130                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1131         /* Set collective engine bit */
1132         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1133                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1134 }
1135
1136 static int gaudi_collective_init(struct hl_device *hdev)
1137 {
1138         u32 i, sob_id, reserved_sobs_per_group;
1139         struct gaudi_collective_properties *prop;
1140         struct gaudi_device *gaudi;
1141
1142         gaudi = hdev->asic_specific;
1143         prop = &gaudi->collective_props;
1144         sob_id = hdev->asic_prop.collective_first_sob;
1145
1146         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1147         reserved_sobs_per_group =
1148                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1149
1150         /* Init SOB groups */
1151         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1152                 prop->hw_sob_group[i].hdev = hdev;
1153                 prop->hw_sob_group[i].base_sob_id = sob_id;
1154                 sob_id += reserved_sobs_per_group;
1155                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1156         }
1157
1158         for (i = 0 ; i < QMAN_STREAMS; i++) {
1159                 prop->next_sob_group_val[i] = 1;
1160                 prop->curr_sob_group_idx[i] = 0;
1161                 gaudi_collective_map_sobs(hdev, i);
1162         }
1163
1164         gaudi_collective_mstr_sob_mask_set(gaudi);
1165
1166         return 0;
1167 }
1168
1169 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1170 {
1171         struct gaudi_device *gaudi = hdev->asic_specific;
1172         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1173
1174         kref_put(&cprop->hw_sob_group[sob_group].kref,
1175                                         gaudi_sob_group_hw_reset);
1176 }
1177
1178 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1179                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1180 {
1181         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1182         struct gaudi_collective_properties *cprop;
1183         struct hl_gen_wait_properties wait_prop;
1184         struct hl_sync_stream_properties *prop;
1185         struct gaudi_device *gaudi;
1186
1187         gaudi = hdev->asic_specific;
1188         cprop = &gaudi->collective_props;
1189         queue_id = job->hw_queue_id;
1190         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1191
1192         master_sob_base =
1193                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1194         master_monitor = prop->collective_mstr_mon_id[0];
1195
1196         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1197
1198         dev_dbg(hdev->dev,
1199                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1200                 master_sob_base, cprop->mstr_sob_mask[0],
1201                 cprop->next_sob_group_val[stream],
1202                 master_monitor, queue_id);
1203
1204         wait_prop.data = (void *) job->patched_cb;
1205         wait_prop.sob_base = master_sob_base;
1206         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1207         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1208         wait_prop.mon_id = master_monitor;
1209         wait_prop.q_idx = queue_id;
1210         wait_prop.size = cb_size;
1211         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1212
1213         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1214         master_monitor = prop->collective_mstr_mon_id[1];
1215
1216         dev_dbg(hdev->dev,
1217                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1218                 master_sob_base, cprop->mstr_sob_mask[1],
1219                 cprop->next_sob_group_val[stream],
1220                 master_monitor, queue_id);
1221
1222         wait_prop.sob_base = master_sob_base;
1223         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1224         wait_prop.mon_id = master_monitor;
1225         wait_prop.size = cb_size;
1226         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1227 }
1228
1229 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1230                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1231 {
1232         struct hl_gen_wait_properties wait_prop;
1233         struct hl_sync_stream_properties *prop;
1234         u32 queue_id, cb_size = 0;
1235
1236         queue_id = job->hw_queue_id;
1237         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1238
1239         if (job->cs->encaps_signals) {
1240                 /* use the encaps signal handle store earlier in the flow
1241                  * and set the SOB information from the encaps
1242                  * signals handle
1243                  */
1244                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1245                                                 cs_cmpl);
1246
1247                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1248                                 job->cs->sequence,
1249                                 cs_cmpl->hw_sob->sob_id,
1250                                 cs_cmpl->sob_val);
1251         }
1252
1253         /* Add to wait CBs using slave monitor */
1254         wait_prop.data = (void *) job->user_cb;
1255         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1256         wait_prop.sob_mask = 0x1;
1257         wait_prop.sob_val = cs_cmpl->sob_val;
1258         wait_prop.mon_id = prop->collective_slave_mon_id;
1259         wait_prop.q_idx = queue_id;
1260         wait_prop.size = cb_size;
1261
1262         dev_dbg(hdev->dev,
1263                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1264                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1265                 prop->collective_slave_mon_id, queue_id);
1266
1267         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1268
1269         dev_dbg(hdev->dev,
1270                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1271                 prop->collective_sob_id, queue_id);
1272
1273         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1274                         prop->collective_sob_id, cb_size, false);
1275 }
1276
1277 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1278 {
1279         struct hl_cs_compl *signal_cs_cmpl =
1280                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1281         struct hl_cs_compl *cs_cmpl =
1282                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1283         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1284         struct gaudi_collective_properties *cprop;
1285         u32 stream, queue_id, sob_group_offset;
1286         struct gaudi_device *gaudi;
1287         struct hl_device *hdev;
1288         struct hl_cs_job *job;
1289         struct hl_ctx *ctx;
1290
1291         ctx = cs->ctx;
1292         hdev = ctx->hdev;
1293         gaudi = hdev->asic_specific;
1294         cprop = &gaudi->collective_props;
1295
1296         if (cs->encaps_signals) {
1297                 cs_cmpl->hw_sob = handle->hw_sob;
1298                 /* at this checkpoint we only need the hw_sob pointer
1299                  * for the completion check before start going over the jobs
1300                  * of the master/slaves, the sob_value will be taken later on
1301                  * in gaudi_collective_slave_init_job depends on each
1302                  * job wait offset value.
1303                  */
1304                 cs_cmpl->sob_val = 0;
1305         } else {
1306                 /* copy the SOB id and value of the signal CS */
1307                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1308                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1309         }
1310
1311         /* check again if the signal cs already completed.
1312          * if yes then don't send any wait cs since the hw_sob
1313          * could be in reset already. if signal is not completed
1314          * then get refcount to hw_sob to prevent resetting the sob
1315          * while wait cs is not submitted.
1316          * note that this check is protected by two locks,
1317          * hw queue lock and completion object lock,
1318          * and the same completion object lock also protects
1319          * the hw_sob reset handler function.
1320          * The hw_queue lock prevent out of sync of hw_sob
1321          * refcount value, changed by signal/wait flows.
1322          */
1323         spin_lock(&signal_cs_cmpl->lock);
1324
1325         if (completion_done(&cs->signal_fence->completion)) {
1326                 spin_unlock(&signal_cs_cmpl->lock);
1327                 return -EINVAL;
1328         }
1329         /* Increment kref since all slave queues are now waiting on it */
1330         kref_get(&cs_cmpl->hw_sob->kref);
1331
1332         spin_unlock(&signal_cs_cmpl->lock);
1333
1334         /* Calculate the stream from collective master queue (1st job) */
1335         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1336         stream = job->hw_queue_id % 4;
1337         sob_group_offset =
1338                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1339
1340         list_for_each_entry(job, &cs->job_list, cs_node) {
1341                 queue_id = job->hw_queue_id;
1342
1343                 if (hdev->kernel_queues[queue_id].collective_mode ==
1344                                 HL_COLLECTIVE_MASTER)
1345                         gaudi_collective_master_init_job(hdev, job, stream,
1346                                                 sob_group_offset);
1347                 else
1348                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1349         }
1350
1351         cs_cmpl->sob_group = sob_group_offset;
1352
1353         /* Handle sob group kref and wraparound */
1354         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1355         cprop->next_sob_group_val[stream]++;
1356
1357         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1358                 /*
1359                  * Decrement as we reached the max value.
1360                  * The release function won't be called here as we've
1361                  * just incremented the refcount.
1362                  */
1363                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1364                                 gaudi_sob_group_reset_error);
1365                 cprop->next_sob_group_val[stream] = 1;
1366                 /* only two SOBs are currently in use */
1367                 cprop->curr_sob_group_idx[stream] =
1368                         (cprop->curr_sob_group_idx[stream] + 1) &
1369                                                         (HL_RSVD_SOBS - 1);
1370
1371                 gaudi_collective_map_sobs(hdev, stream);
1372
1373                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1374                                 cprop->curr_sob_group_idx[stream], stream);
1375         }
1376
1377         mb();
1378         hl_fence_put(cs->signal_fence);
1379         cs->signal_fence = NULL;
1380
1381         return 0;
1382 }
1383
1384 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1385                 struct hl_ctx *ctx, struct hl_cs *cs,
1386                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1387                 u32 encaps_signal_offset)
1388 {
1389         struct hw_queue_properties *hw_queue_prop;
1390         struct hl_cs_counters_atomic *cntr;
1391         struct hl_cs_job *job;
1392         struct hl_cb *cb;
1393         u32 cb_size;
1394         bool patched_cb;
1395
1396         cntr = &hdev->aggregated_cs_counters;
1397
1398         if (mode == HL_COLLECTIVE_MASTER) {
1399                 /* CB size of collective master queue contains
1400                  * 4 msg short packets for monitor 1 configuration
1401                  * 1 fence packet
1402                  * 4 msg short packets for monitor 2 configuration
1403                  * 1 fence packet
1404                  * 2 msg prot packets for completion and MSI-X
1405                  */
1406                 cb_size = sizeof(struct packet_msg_short) * 8 +
1407                                 sizeof(struct packet_fence) * 2 +
1408                                 sizeof(struct packet_msg_prot) * 2;
1409                 patched_cb = true;
1410         } else {
1411                 /* CB size of collective slave queues contains
1412                  * 4 msg short packets for monitor configuration
1413                  * 1 fence packet
1414                  * 1 additional msg short packet for sob signal
1415                  */
1416                 cb_size = sizeof(struct packet_msg_short) * 5 +
1417                                 sizeof(struct packet_fence);
1418                 patched_cb = false;
1419         }
1420
1421         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1422         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1423         if (!job) {
1424                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1425                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1426                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1427                 return -ENOMEM;
1428         }
1429
1430         /* Allocate internal mapped CB for non patched CBs */
1431         cb = hl_cb_kernel_create(hdev, cb_size,
1432                         hdev->mmu_enable && !patched_cb);
1433         if (!cb) {
1434                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1435                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1436                 kfree(job);
1437                 return -EFAULT;
1438         }
1439
1440         job->id = 0;
1441         job->cs = cs;
1442         job->user_cb = cb;
1443         atomic_inc(&job->user_cb->cs_cnt);
1444         job->user_cb_size = cb_size;
1445         job->hw_queue_id = queue_id;
1446
1447         /* since its guaranteed to have only one chunk in the collective wait
1448          * cs, we can use this chunk to set the encapsulated signal offset
1449          * in the jobs.
1450          */
1451         if (cs->encaps_signals)
1452                 job->encaps_sig_wait_offset = encaps_signal_offset;
1453
1454         /*
1455          * No need in parsing, user CB is the patched CB.
1456          * We call hl_cb_destroy() out of two reasons - we don't need
1457          * the CB in the CB idr anymore and to decrement its refcount as
1458          * it was incremented inside hl_cb_kernel_create().
1459          */
1460         if (patched_cb)
1461                 job->patched_cb = job->user_cb;
1462         else
1463                 job->patched_cb = NULL;
1464
1465         job->job_cb_size = job->user_cb_size;
1466         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1467
1468         /* increment refcount as for external queues we get completion */
1469         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1470                 cs_get(cs);
1471
1472         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1473
1474         list_add_tail(&job->cs_node, &cs->job_list);
1475
1476         hl_debugfs_add_job(hdev, job);
1477
1478         return 0;
1479 }
1480
1481 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1482                 struct hl_ctx *ctx, struct hl_cs *cs,
1483                 u32 wait_queue_id, u32 collective_engine_id,
1484                 u32 encaps_signal_offset)
1485 {
1486         struct gaudi_device *gaudi = hdev->asic_specific;
1487         struct hw_queue_properties *hw_queue_prop;
1488         u32 queue_id, collective_queue, num_jobs;
1489         u32 stream, nic_queue, nic_idx = 0;
1490         bool skip;
1491         int i, rc = 0;
1492
1493         /* Verify wait queue id is configured as master */
1494         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1495         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1496                 dev_err(hdev->dev,
1497                         "Queue %d is not configured as collective master\n",
1498                         wait_queue_id);
1499                 return -EINVAL;
1500         }
1501
1502         /* Verify engine id is supported */
1503         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1504                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1505                 dev_err(hdev->dev,
1506                         "Collective wait does not support engine %u\n",
1507                         collective_engine_id);
1508                 return -EINVAL;
1509         }
1510
1511         stream = wait_queue_id % 4;
1512
1513         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1514                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1515         else
1516                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1517
1518         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1519         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1520
1521         /* First job goes to the collective master queue, it will wait for
1522          * the collective slave queues to finish execution.
1523          * The synchronization is done using two monitors:
1524          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1525          * reduction engine (DMA5/TPC7).
1526          *
1527          * Rest of the jobs goes to the collective slave queues which will
1528          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1529          */
1530         for (i = 0 ; i < num_jobs ; i++) {
1531                 if (i == 0) {
1532                         queue_id = wait_queue_id;
1533                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1534                                 HL_COLLECTIVE_MASTER, queue_id,
1535                                 wait_queue_id, encaps_signal_offset);
1536                 } else {
1537                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1538                                 if (gaudi->hw_cap_initialized &
1539                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1540                                         skip = false;
1541                                 else
1542                                         skip = true;
1543
1544                                 queue_id = nic_queue;
1545                                 nic_queue += 4;
1546                                 nic_idx++;
1547
1548                                 if (skip)
1549                                         continue;
1550                         } else {
1551                                 queue_id = collective_queue;
1552                         }
1553
1554                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1555                                 HL_COLLECTIVE_SLAVE, queue_id,
1556                                 wait_queue_id, encaps_signal_offset);
1557                 }
1558
1559                 if (rc)
1560                         return rc;
1561         }
1562
1563         return rc;
1564 }
1565
1566 static int gaudi_late_init(struct hl_device *hdev)
1567 {
1568         struct gaudi_device *gaudi = hdev->asic_specific;
1569         int rc;
1570
1571         rc = gaudi->cpucp_info_get(hdev);
1572         if (rc) {
1573                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1574                 return rc;
1575         }
1576
1577         if ((hdev->card_type == cpucp_card_type_pci) &&
1578                         (hdev->nic_ports_mask & 0x3)) {
1579                 dev_info(hdev->dev,
1580                         "PCI card detected, only 8 ports are enabled\n");
1581                 hdev->nic_ports_mask &= ~0x3;
1582
1583                 /* Stop and disable unused NIC QMANs */
1584                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1585                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1586                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1587
1588                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1589                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1590                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1591
1592                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1593                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1594
1595                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1596         }
1597
1598         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1599         if (rc) {
1600                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1601                 return rc;
1602         }
1603
1604         /* Scrub both SRAM and DRAM */
1605         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1606         if (rc)
1607                 goto disable_pci_access;
1608
1609         rc = gaudi_fetch_psoc_frequency(hdev);
1610         if (rc) {
1611                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1612                 goto disable_pci_access;
1613         }
1614
1615         rc = gaudi_mmu_clear_pgt_range(hdev);
1616         if (rc) {
1617                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1618                 goto disable_pci_access;
1619         }
1620
1621         rc = gaudi_init_tpc_mem(hdev);
1622         if (rc) {
1623                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1624                 goto disable_pci_access;
1625         }
1626
1627         rc = gaudi_collective_init(hdev);
1628         if (rc) {
1629                 dev_err(hdev->dev, "Failed to init collective\n");
1630                 goto disable_pci_access;
1631         }
1632
1633         /* We only support a single ASID for the user, so for the sake of optimization, just
1634          * initialize the ASID one time during device initialization with the fixed value of 1
1635          */
1636         gaudi_mmu_prepare(hdev, 1);
1637
1638         hl_fw_set_pll_profile(hdev);
1639
1640         return 0;
1641
1642 disable_pci_access:
1643         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1644
1645         return rc;
1646 }
1647
1648 static void gaudi_late_fini(struct hl_device *hdev)
1649 {
1650         const struct hwmon_channel_info **channel_info_arr;
1651         int i = 0;
1652
1653         if (!hdev->hl_chip_info->info)
1654                 return;
1655
1656         channel_info_arr = hdev->hl_chip_info->info;
1657
1658         while (channel_info_arr[i]) {
1659                 kfree(channel_info_arr[i]->config);
1660                 kfree(channel_info_arr[i]);
1661                 i++;
1662         }
1663
1664         kfree(channel_info_arr);
1665
1666         hdev->hl_chip_info->info = NULL;
1667 }
1668
1669 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1670 {
1671         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1672         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1673         int i, j, rc = 0;
1674
1675         /*
1676          * The device CPU works with 40-bits addresses, while bit 39 must be set
1677          * to '1' when accessing the host.
1678          * Bits 49:39 of the full host address are saved for a later
1679          * configuration of the HW to perform extension to 50 bits.
1680          * Because there is a single HW register that holds the extension bits,
1681          * these bits must be identical in all allocated range.
1682          */
1683
1684         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1685                 virt_addr_arr[i] =
1686                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1687                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1688                                                 &dma_addr_arr[i],
1689                                                 GFP_KERNEL | __GFP_ZERO);
1690                 if (!virt_addr_arr[i]) {
1691                         rc = -ENOMEM;
1692                         goto free_dma_mem_arr;
1693                 }
1694
1695                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1696                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1697                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1698                         break;
1699         }
1700
1701         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1702                 dev_err(hdev->dev,
1703                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1704                 rc = -EFAULT;
1705                 goto free_dma_mem_arr;
1706         }
1707
1708         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1709         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1710         hdev->cpu_pci_msb_addr =
1711                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1712
1713         if (!hdev->asic_prop.fw_security_enabled)
1714                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1715
1716 free_dma_mem_arr:
1717         for (j = 0 ; j < i ; j++)
1718                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1719                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1720                                                 virt_addr_arr[j],
1721                                                 dma_addr_arr[j]);
1722
1723         return rc;
1724 }
1725
1726 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1727 {
1728         struct gaudi_device *gaudi = hdev->asic_specific;
1729         struct gaudi_internal_qman_info *q;
1730         u32 i;
1731
1732         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1733                 q = &gaudi->internal_qmans[i];
1734                 if (!q->pq_kernel_addr)
1735                         continue;
1736                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1737                                                         q->pq_kernel_addr,
1738                                                         q->pq_dma_addr);
1739         }
1740 }
1741
1742 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1743 {
1744         struct gaudi_device *gaudi = hdev->asic_specific;
1745         struct gaudi_internal_qman_info *q;
1746         int rc, i;
1747
1748         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1749                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1750                         continue;
1751
1752                 q = &gaudi->internal_qmans[i];
1753
1754                 switch (i) {
1755                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1756                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1757                         break;
1758                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1759                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1760                         break;
1761                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1762                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1763                         break;
1764                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1765                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1766                         break;
1767                 default:
1768                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1769                         rc = -EINVAL;
1770                         goto free_internal_qmans_pq_mem;
1771                 }
1772
1773                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1774                                                 hdev, q->pq_size,
1775                                                 &q->pq_dma_addr,
1776                                                 GFP_KERNEL | __GFP_ZERO);
1777                 if (!q->pq_kernel_addr) {
1778                         rc = -ENOMEM;
1779                         goto free_internal_qmans_pq_mem;
1780                 }
1781         }
1782
1783         return 0;
1784
1785 free_internal_qmans_pq_mem:
1786         gaudi_free_internal_qmans_pq_mem(hdev);
1787         return rc;
1788 }
1789
1790 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1791 {
1792         struct asic_fixed_properties *prop = &hdev->asic_prop;
1793         struct pci_mem_region *region;
1794
1795         /* CFG */
1796         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1797         region->region_base = CFG_BASE;
1798         region->region_size = CFG_SIZE;
1799         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1800         region->bar_size = CFG_BAR_SIZE;
1801         region->bar_id = CFG_BAR_ID;
1802         region->used = 1;
1803
1804         /* SRAM */
1805         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1806         region->region_base = SRAM_BASE_ADDR;
1807         region->region_size = SRAM_SIZE;
1808         region->offset_in_bar = 0;
1809         region->bar_size = SRAM_BAR_SIZE;
1810         region->bar_id = SRAM_BAR_ID;
1811         region->used = 1;
1812
1813         /* DRAM */
1814         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1815         region->region_base = DRAM_PHYS_BASE;
1816         region->region_size = hdev->asic_prop.dram_size;
1817         region->offset_in_bar = 0;
1818         region->bar_size = prop->dram_pci_bar_size;
1819         region->bar_id = HBM_BAR_ID;
1820         region->used = 1;
1821
1822         /* SP SRAM */
1823         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1824         region->region_base = PSOC_SCRATCHPAD_ADDR;
1825         region->region_size = PSOC_SCRATCHPAD_SIZE;
1826         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1827         region->bar_size = CFG_BAR_SIZE;
1828         region->bar_id = CFG_BAR_ID;
1829         region->used = 1;
1830 }
1831
1832 static int gaudi_sw_init(struct hl_device *hdev)
1833 {
1834         struct gaudi_device *gaudi;
1835         u32 i, event_id = 0;
1836         int rc;
1837
1838         /* Allocate device structure */
1839         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1840         if (!gaudi)
1841                 return -ENOMEM;
1842
1843         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1844                 if (gaudi_irq_map_table[i].valid) {
1845                         if (event_id == GAUDI_EVENT_SIZE) {
1846                                 dev_err(hdev->dev,
1847                                         "Event array exceeds the limit of %u events\n",
1848                                         GAUDI_EVENT_SIZE);
1849                                 rc = -EINVAL;
1850                                 goto free_gaudi_device;
1851                         }
1852
1853                         gaudi->events[event_id++] =
1854                                         gaudi_irq_map_table[i].fc_id;
1855                 }
1856         }
1857
1858         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1859
1860         hdev->asic_specific = gaudi;
1861
1862         /* Create DMA pool for small allocations */
1863         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1864                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1865         if (!hdev->dma_pool) {
1866                 dev_err(hdev->dev, "failed to create DMA pool\n");
1867                 rc = -ENOMEM;
1868                 goto free_gaudi_device;
1869         }
1870
1871         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1872         if (rc)
1873                 goto free_dma_pool;
1874
1875         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1876         if (!hdev->cpu_accessible_dma_pool) {
1877                 dev_err(hdev->dev,
1878                         "Failed to create CPU accessible DMA pool\n");
1879                 rc = -ENOMEM;
1880                 goto free_cpu_dma_mem;
1881         }
1882
1883         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1884                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1885                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1886         if (rc) {
1887                 dev_err(hdev->dev,
1888                         "Failed to add memory to CPU accessible DMA pool\n");
1889                 rc = -EFAULT;
1890                 goto free_cpu_accessible_dma_pool;
1891         }
1892
1893         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1894         if (rc)
1895                 goto free_cpu_accessible_dma_pool;
1896
1897         spin_lock_init(&gaudi->hw_queues_lock);
1898
1899         hdev->supports_sync_stream = true;
1900         hdev->supports_coresight = true;
1901         hdev->supports_staged_submission = true;
1902         hdev->supports_wait_for_multi_cs = true;
1903
1904         hdev->asic_funcs->set_pci_memory_regions(hdev);
1905         hdev->stream_master_qid_arr =
1906                                 hdev->asic_funcs->get_stream_master_qid_arr();
1907         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1908
1909         return 0;
1910
1911 free_cpu_accessible_dma_pool:
1912         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1913 free_cpu_dma_mem:
1914         if (!hdev->asic_prop.fw_security_enabled)
1915                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1916                                         hdev->cpu_pci_msb_addr);
1917         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1918                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1919                         hdev->cpu_accessible_dma_mem,
1920                         hdev->cpu_accessible_dma_address);
1921 free_dma_pool:
1922         dma_pool_destroy(hdev->dma_pool);
1923 free_gaudi_device:
1924         kfree(gaudi);
1925         return rc;
1926 }
1927
1928 static int gaudi_sw_fini(struct hl_device *hdev)
1929 {
1930         struct gaudi_device *gaudi = hdev->asic_specific;
1931
1932         gaudi_free_internal_qmans_pq_mem(hdev);
1933
1934         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1935
1936         if (!hdev->asic_prop.fw_security_enabled)
1937                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1938                                         hdev->cpu_pci_msb_addr);
1939
1940         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1941                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1942                         hdev->cpu_accessible_dma_mem,
1943                         hdev->cpu_accessible_dma_address);
1944
1945         dma_pool_destroy(hdev->dma_pool);
1946
1947         kfree(gaudi);
1948
1949         return 0;
1950 }
1951
1952 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1953 {
1954         struct hl_device *hdev = arg;
1955         int i;
1956
1957         if (hdev->disabled)
1958                 return IRQ_HANDLED;
1959
1960         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1961                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1962
1963         hl_irq_handler_eq(irq, &hdev->event_queue);
1964
1965         return IRQ_HANDLED;
1966 }
1967
1968 /*
1969  * For backward compatibility, new MSI interrupts should be set after the
1970  * existing CPU and NIC interrupts.
1971  */
1972 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1973                                 bool cpu_eq)
1974 {
1975         int msi_vec;
1976
1977         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1978                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1979                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1980
1981         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1982                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1983
1984         return pci_irq_vector(hdev->pdev, msi_vec);
1985 }
1986
1987 static int gaudi_enable_msi_single(struct hl_device *hdev)
1988 {
1989         int rc, irq;
1990
1991         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1992
1993         irq = gaudi_pci_irq_vector(hdev, 0, false);
1994         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1995                         "gaudi single msi", hdev);
1996         if (rc)
1997                 dev_err(hdev->dev,
1998                         "Failed to request single MSI IRQ\n");
1999
2000         return rc;
2001 }
2002
2003 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2004 {
2005         int cq_cnt = hdev->asic_prop.completion_queues_count;
2006         int rc, i, irq_cnt_init, irq;
2007
2008         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2009                 irq = gaudi_pci_irq_vector(hdev, i, false);
2010                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2011                                 &hdev->completion_queue[i]);
2012                 if (rc) {
2013                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2014                         goto free_irqs;
2015                 }
2016         }
2017
2018         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2019         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2020                                 &hdev->event_queue);
2021         if (rc) {
2022                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2023                 goto free_irqs;
2024         }
2025
2026         return 0;
2027
2028 free_irqs:
2029         for (i = 0 ; i < irq_cnt_init ; i++)
2030                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2031                                 &hdev->completion_queue[i]);
2032         return rc;
2033 }
2034
2035 static int gaudi_enable_msi(struct hl_device *hdev)
2036 {
2037         struct gaudi_device *gaudi = hdev->asic_specific;
2038         int rc;
2039
2040         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2041                 return 0;
2042
2043         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2044         if (rc < 0) {
2045                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2046                 return rc;
2047         }
2048
2049         if (rc < NUMBER_OF_INTERRUPTS) {
2050                 gaudi->multi_msi_mode = false;
2051                 rc = gaudi_enable_msi_single(hdev);
2052         } else {
2053                 gaudi->multi_msi_mode = true;
2054                 rc = gaudi_enable_msi_multi(hdev);
2055         }
2056
2057         if (rc)
2058                 goto free_pci_irq_vectors;
2059
2060         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2061
2062         return 0;
2063
2064 free_pci_irq_vectors:
2065         pci_free_irq_vectors(hdev->pdev);
2066         return rc;
2067 }
2068
2069 static void gaudi_sync_irqs(struct hl_device *hdev)
2070 {
2071         struct gaudi_device *gaudi = hdev->asic_specific;
2072         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2073
2074         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2075                 return;
2076
2077         /* Wait for all pending IRQs to be finished */
2078         if (gaudi->multi_msi_mode) {
2079                 for (i = 0 ; i < cq_cnt ; i++)
2080                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2081
2082                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2083                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2084                                                 true));
2085         } else {
2086                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2087         }
2088 }
2089
2090 static void gaudi_disable_msi(struct hl_device *hdev)
2091 {
2092         struct gaudi_device *gaudi = hdev->asic_specific;
2093         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2094
2095         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2096                 return;
2097
2098         gaudi_sync_irqs(hdev);
2099
2100         if (gaudi->multi_msi_mode) {
2101                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2102                                                 true);
2103                 free_irq(irq, &hdev->event_queue);
2104
2105                 for (i = 0 ; i < cq_cnt ; i++) {
2106                         irq = gaudi_pci_irq_vector(hdev, i, false);
2107                         free_irq(irq, &hdev->completion_queue[i]);
2108                 }
2109         } else {
2110                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2111         }
2112
2113         pci_free_irq_vectors(hdev->pdev);
2114
2115         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2116 }
2117
2118 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2119 {
2120         struct gaudi_device *gaudi = hdev->asic_specific;
2121
2122         if (hdev->asic_prop.fw_security_enabled)
2123                 return;
2124
2125         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2126                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2127                 return;
2128
2129         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2130                 return;
2131
2132         if (!hdev->sram_scrambler_enable)
2133                 return;
2134
2135         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2136                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2137         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2138                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2140                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2142                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2144                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2146                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2148                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2150                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151
2152         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2153                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2155                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2157                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2159                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2161                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2163                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2165                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2167                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2168
2169         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2170                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2171         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2172                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2173         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2174                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2175         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2176                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2177         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2178                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2179         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2180                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2181         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2182                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2183         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2184                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2185
2186         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2187 }
2188
2189 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2190 {
2191         struct gaudi_device *gaudi = hdev->asic_specific;
2192
2193         if (hdev->asic_prop.fw_security_enabled)
2194                 return;
2195
2196         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2197                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2198                 return;
2199
2200         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2201                 return;
2202
2203         if (!hdev->dram_scrambler_enable)
2204                 return;
2205
2206         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2207                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2208         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2209                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2211                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2213                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2215                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2217                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2219                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2221                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222
2223         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2224                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2225         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2226                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2228                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2230                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2232                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2234                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2236                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2238                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239
2240         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2241                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2242         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2243                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2244         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2245                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2247                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2249                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2251                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2253                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2255                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256
2257         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2258 }
2259
2260 static void gaudi_init_e2e(struct hl_device *hdev)
2261 {
2262         if (hdev->asic_prop.fw_security_enabled)
2263                 return;
2264
2265         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2266                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2267                 return;
2268
2269         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2270         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2271         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2272         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2273
2274         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2275         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2276         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2277         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2278
2279         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2280         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2281         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2282         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2283
2284         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2285         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2286         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2287         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2288
2289         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2290         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2291         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2292         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2293
2294         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2295         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2296         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2297         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2298
2299         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2300         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2301         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2302         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2303
2304         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2305         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2306         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2307         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2308
2309         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2310         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2311         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2312         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2313
2314         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2315         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2316         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2317         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2318
2319         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2320         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2321         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2322         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2323
2324         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2325         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2326         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2327         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2328
2329         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2330         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2331         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2332         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2333
2334         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2335         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2336         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2337         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2338
2339         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2340         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2341         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2342         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2343
2344         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2345         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2346         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2347         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2348
2349         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2350         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2351         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2352         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2353
2354         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2355         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2356         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2357         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2358
2359         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2360         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2361         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2362         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2363
2364         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2365         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2366         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2367         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2368
2369         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2370         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2371         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2372         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2373
2374         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2375         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2376         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2377         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2378
2379         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2380         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2381         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2382         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2383
2384         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2385         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2386         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2387         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2388
2389         if (!hdev->dram_scrambler_enable) {
2390                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2391                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2392                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2393                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2394
2395                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2396                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2397                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2398                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2399
2400                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2401                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2402                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2403                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2404
2405                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2406                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2407                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2408                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2409
2410                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2411                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2412                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2413                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2414
2415                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2416                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2417                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2418                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2419
2420                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2421                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2422                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2423                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2424
2425                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2426                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2427                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2428                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2429
2430                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2431                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2432                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2433                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2434
2435                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2436                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2437                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2438                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2439
2440                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2441                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2442                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2443                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2444
2445                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2446                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2447                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2448                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2449
2450                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2451                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2452                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2453                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2454
2455                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2456                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2457                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2458                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2459
2460                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2461                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2462                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2463                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2464
2465                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2466                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2467                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2468                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2469
2470                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2471                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2472                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2473                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2474
2475                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2476                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2477                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2478                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2479
2480                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2481                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2482                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2483                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2484
2485                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2486                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2487                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2488                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2489
2490                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2491                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2492                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2493                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2494
2495                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2496                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2497                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2498                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2499
2500                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2501                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2502                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2503                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2504
2505                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2506                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2507                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2508                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2509         }
2510
2511         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2512                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2514                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515
2516         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2517                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2519                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520
2521         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2522                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2524                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525
2526         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2527                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2529                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530
2531         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2532                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2534                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535
2536         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2537                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2539                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540
2541         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2542                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2544                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545
2546         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2547                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2549                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550
2551         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2552                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2554                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555
2556         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2557                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2558         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2559                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2560
2561         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2562                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2563         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2564                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2565
2566         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2567                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2568         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2569                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2570
2571         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2572                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2573         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2574                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2575
2576         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2577                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2578         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2579                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2580
2581         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2582                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2583         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2584                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2585
2586         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2587                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2588         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2589                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2590
2591         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2592                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2594                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595
2596         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2597                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2598         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2599                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2600
2601         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2602                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2603         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2604                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2605
2606         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2607                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2608         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2609                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2610
2611         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2612                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2613         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2614                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2615
2616         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2617                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2618         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2619                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2620
2621         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2622                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2623         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2624                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2625
2626         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2627                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2628         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2629                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2630 }
2631
2632 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2633 {
2634         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2635
2636         if (hdev->asic_prop.fw_security_enabled)
2637                 return;
2638
2639         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2640                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2641                 return;
2642
2643         hbm0_wr = 0x33333333;
2644         hbm0_rd = 0x77777777;
2645         hbm1_wr = 0x55555555;
2646         hbm1_rd = 0xDDDDDDDD;
2647
2648         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2649         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2650         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2651         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2652
2653         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2654         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2655         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2656         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2657
2658         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2659         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2660         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2661         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2662
2663         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2664         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2665         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2666         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2667
2668         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2669                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2670                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2671         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2672                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2673                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2674         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2675                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2676                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2677         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2678                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2679                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2680
2681         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2682                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2683                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2684         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2685                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2686                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2687         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2688                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2689                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2690         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2691                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2692                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2693 }
2694
2695 static void gaudi_init_golden_registers(struct hl_device *hdev)
2696 {
2697         u32 tpc_offset;
2698         int tpc_id, i;
2699
2700         gaudi_init_e2e(hdev);
2701         gaudi_init_hbm_cred(hdev);
2702
2703         for (tpc_id = 0, tpc_offset = 0;
2704                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2705                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2706                 /* Mask all arithmetic interrupts from TPC */
2707                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2708                 /* Set 16 cache lines */
2709                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2710                                 ICACHE_FETCH_LINE_NUM, 2);
2711         }
2712
2713         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2714         for (i = 0 ; i < 128 ; i += 8)
2715                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2716
2717         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2718         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2719         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2720         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2721 }
2722
2723 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2724                                         int qman_id, dma_addr_t qman_pq_addr)
2725 {
2726         struct cpu_dyn_regs *dyn_regs =
2727                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2728         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2729         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2730         u32 q_off, dma_qm_offset;
2731         u32 dma_qm_err_cfg, irq_handler_offset;
2732
2733         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2734
2735         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2736                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2737         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2738                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739         so_base_en_lo = lower_32_bits(CFG_BASE +
2740                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2741         so_base_en_hi = upper_32_bits(CFG_BASE +
2742                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2744                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2745         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2746                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747         so_base_ws_lo = lower_32_bits(CFG_BASE +
2748                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2749         so_base_ws_hi = upper_32_bits(CFG_BASE +
2750                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751
2752         q_off = dma_qm_offset + qman_id * 4;
2753
2754         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2755         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2756
2757         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2758         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2759         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2760
2761         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2762         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2763                                                         QMAN_LDMA_SRC_OFFSET);
2764         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2765                                                         QMAN_LDMA_DST_OFFSET);
2766
2767         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2768         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2769         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2770         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2771         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2772         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2773         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2774         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2775
2776         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2777
2778         /* The following configuration is needed only once per QMAN */
2779         if (qman_id == 0) {
2780                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2781                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2782                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2783
2784                 /* Configure RAZWI IRQ */
2785                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786                 if (hdev->stop_on_err)
2787                         dma_qm_err_cfg |=
2788                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789
2790                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791
2792                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793                         lower_32_bits(CFG_BASE + irq_handler_offset));
2794                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795                         upper_32_bits(CFG_BASE + irq_handler_offset));
2796
2797                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799                                                                         dma_id);
2800
2801                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802                                 QM_ARB_ERR_MSG_EN_MASK);
2803
2804                 /* Increase ARB WDT to support streams architecture */
2805                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2806                                 GAUDI_ARB_WDT_TIMEOUT);
2807
2808                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2809                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2810
2811                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2812         }
2813 }
2814
2815 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2816 {
2817         struct cpu_dyn_regs *dyn_regs =
2818                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2819         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2820         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2821         u32 irq_handler_offset;
2822
2823         /* Set to maximum possible according to physical size */
2824         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2825         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2826
2827         /* WA for H/W bug H3-2116 */
2828         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2829
2830         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2831         if (hdev->stop_on_err)
2832                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2833
2834         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2835
2836         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2837                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2838                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2839
2840         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2841                 lower_32_bits(CFG_BASE + irq_handler_offset));
2842         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2843                 upper_32_bits(CFG_BASE + irq_handler_offset));
2844
2845         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2846                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2847         WREG32(mmDMA0_CORE_PROT + dma_offset,
2848                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2849         /* If the channel is secured, it should be in MMU bypass mode */
2850         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2851                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2852         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2853 }
2854
2855 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2856                                 u32 enable_mask)
2857 {
2858         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2859
2860         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2861 }
2862
2863 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2864 {
2865         struct gaudi_device *gaudi = hdev->asic_specific;
2866         struct hl_hw_queue *q;
2867         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2868
2869         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2870                 return;
2871
2872         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2873                 dma_id = gaudi_dma_assignment[i];
2874                 /*
2875                  * For queues after the CPU Q need to add 1 to get the correct
2876                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2877                  * order to get the correct MSI register.
2878                  */
2879                 if (dma_id > 1) {
2880                         cpu_skip = 1;
2881                         nic_skip = NIC_NUMBER_OF_ENGINES;
2882                 } else {
2883                         cpu_skip = 0;
2884                         nic_skip = 0;
2885                 }
2886
2887                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2888                         q_idx = 4 * dma_id + j + cpu_skip;
2889                         q = &hdev->kernel_queues[q_idx];
2890                         q->cq_id = cq_id++;
2891                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2892                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2893                                                 q->bus_address);
2894                 }
2895
2896                 gaudi_init_dma_core(hdev, dma_id);
2897
2898                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2899         }
2900
2901         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2902 }
2903
2904 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2905                                         int qman_id, u64 qman_base_addr)
2906 {
2907         struct cpu_dyn_regs *dyn_regs =
2908                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2909         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2910         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2911         u32 dma_qm_err_cfg, irq_handler_offset;
2912         u32 q_off, dma_qm_offset;
2913
2914         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2915
2916         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2917                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2918         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2919                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2920         so_base_en_lo = lower_32_bits(CFG_BASE +
2921                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2922         so_base_en_hi = upper_32_bits(CFG_BASE +
2923                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2924         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2925                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2926         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2927                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2928         so_base_ws_lo = lower_32_bits(CFG_BASE +
2929                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2930         so_base_ws_hi = upper_32_bits(CFG_BASE +
2931                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2932
2933         q_off = dma_qm_offset + qman_id * 4;
2934
2935         if (qman_id < 4) {
2936                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2937                                         lower_32_bits(qman_base_addr));
2938                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2939                                         upper_32_bits(qman_base_addr));
2940
2941                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2942                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2943                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2944
2945                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2946                                                         QMAN_CPDMA_SIZE_OFFSET);
2947                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2948                                                         QMAN_CPDMA_SRC_OFFSET);
2949                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2950                                                         QMAN_CPDMA_DST_OFFSET);
2951         } else {
2952                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2953                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2954                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2955
2956                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2957                                                         QMAN_LDMA_SIZE_OFFSET);
2958                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2959                                                         QMAN_LDMA_SRC_OFFSET);
2960                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2961                                                         QMAN_LDMA_DST_OFFSET);
2962
2963                 /* Configure RAZWI IRQ */
2964                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2965                 if (hdev->stop_on_err)
2966                         dma_qm_err_cfg |=
2967                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2968
2969                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2970
2971                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2972                         lower_32_bits(CFG_BASE + irq_handler_offset));
2973                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2974                         upper_32_bits(CFG_BASE + irq_handler_offset));
2975
2976                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2977                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2978                                                                         dma_id);
2979
2980                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2981                                 QM_ARB_ERR_MSG_EN_MASK);
2982
2983                 /* Increase ARB WDT to support streams architecture */
2984                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2985                                 GAUDI_ARB_WDT_TIMEOUT);
2986
2987                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2988                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2989                                 QMAN_INTERNAL_MAKE_TRUSTED);
2990         }
2991
2992         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2993         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2994         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2995         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2996
2997         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2998         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2999                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3000                                 mtr_base_ws_lo);
3001                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3002                                 mtr_base_ws_hi);
3003                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3004                                 so_base_ws_lo);
3005                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3006                                 so_base_ws_hi);
3007         }
3008 }
3009
3010 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
3011 {
3012         struct gaudi_device *gaudi = hdev->asic_specific;
3013         struct gaudi_internal_qman_info *q;
3014         u64 qman_base_addr;
3015         int i, j, dma_id, internal_q_index;
3016
3017         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3018                 return;
3019
3020         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3021                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3022
3023                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3024                          /*
3025                           * Add the CPU queue in order to get the correct queue
3026                           * number as all internal queue are placed after it
3027                           */
3028                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3029
3030                         q = &gaudi->internal_qmans[internal_q_index];
3031                         qman_base_addr = (u64) q->pq_dma_addr;
3032                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3033                                                 qman_base_addr);
3034                 }
3035
3036                 /* Initializing lower CP for HBM DMA QMAN */
3037                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3038
3039                 gaudi_init_dma_core(hdev, dma_id);
3040
3041                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3042         }
3043
3044         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3045 }
3046
3047 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3048                                         int qman_id, u64 qman_base_addr)
3049 {
3050         struct cpu_dyn_regs *dyn_regs =
3051                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3052         u32 mtr_base_lo, mtr_base_hi;
3053         u32 so_base_lo, so_base_hi;
3054         u32 irq_handler_offset;
3055         u32 q_off, mme_id;
3056         u32 mme_qm_err_cfg;
3057
3058         mtr_base_lo = lower_32_bits(CFG_BASE +
3059                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3060         mtr_base_hi = upper_32_bits(CFG_BASE +
3061                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3062         so_base_lo = lower_32_bits(CFG_BASE +
3063                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3064         so_base_hi = upper_32_bits(CFG_BASE +
3065                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3066
3067         q_off = mme_offset + qman_id * 4;
3068
3069         if (qman_id < 4) {
3070                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3071                                         lower_32_bits(qman_base_addr));
3072                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3073                                         upper_32_bits(qman_base_addr));
3074
3075                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3076                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3077                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3078
3079                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3080                                                         QMAN_CPDMA_SIZE_OFFSET);
3081                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3082                                                         QMAN_CPDMA_SRC_OFFSET);
3083                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3084                                                         QMAN_CPDMA_DST_OFFSET);
3085         } else {
3086                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3087                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3088                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3089
3090                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3091                                                         QMAN_LDMA_SIZE_OFFSET);
3092                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3093                                                         QMAN_LDMA_SRC_OFFSET);
3094                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3095                                                         QMAN_LDMA_DST_OFFSET);
3096
3097                 /* Configure RAZWI IRQ */
3098                 mme_id = mme_offset /
3099                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3100
3101                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3102                 if (hdev->stop_on_err)
3103                         mme_qm_err_cfg |=
3104                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3105
3106                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3107
3108                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3109                         lower_32_bits(CFG_BASE + irq_handler_offset));
3110                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3111                         upper_32_bits(CFG_BASE + irq_handler_offset));
3112
3113                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3114                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3115                                                                         mme_id);
3116
3117                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3118                                 QM_ARB_ERR_MSG_EN_MASK);
3119
3120                 /* Increase ARB WDT to support streams architecture */
3121                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3122                                 GAUDI_ARB_WDT_TIMEOUT);
3123
3124                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3125                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3126                                 QMAN_INTERNAL_MAKE_TRUSTED);
3127         }
3128
3129         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3130         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3131         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3132         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3133 }
3134
3135 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3136 {
3137         struct gaudi_device *gaudi = hdev->asic_specific;
3138         struct gaudi_internal_qman_info *q;
3139         u64 qman_base_addr;
3140         u32 mme_offset;
3141         int i, internal_q_index;
3142
3143         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3144                 return;
3145
3146         /*
3147          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3148          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3149          */
3150
3151         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3152
3153         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3154                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3155                 q = &gaudi->internal_qmans[internal_q_index];
3156                 qman_base_addr = (u64) q->pq_dma_addr;
3157                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3158                                         qman_base_addr);
3159                 if (i == 3)
3160                         mme_offset = 0;
3161         }
3162
3163         /* Initializing lower CP for MME QMANs */
3164         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3165         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3166         gaudi_init_mme_qman(hdev, 0, 4, 0);
3167
3168         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3169         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3170
3171         gaudi->hw_cap_initialized |= HW_CAP_MME;
3172 }
3173
3174 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3175                                 int qman_id, u64 qman_base_addr)
3176 {
3177         struct cpu_dyn_regs *dyn_regs =
3178                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3179         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3180         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3181         u32 tpc_qm_err_cfg, irq_handler_offset;
3182         u32 q_off, tpc_id;
3183
3184         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3185                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3186         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3187                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3188         so_base_en_lo = lower_32_bits(CFG_BASE +
3189                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3190         so_base_en_hi = upper_32_bits(CFG_BASE +
3191                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3192         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3193                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3194         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3195                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3196         so_base_ws_lo = lower_32_bits(CFG_BASE +
3197                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3198         so_base_ws_hi = upper_32_bits(CFG_BASE +
3199                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3200
3201         q_off = tpc_offset + qman_id * 4;
3202
3203         tpc_id = tpc_offset /
3204                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3205
3206         if (qman_id < 4) {
3207                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3208                                         lower_32_bits(qman_base_addr));
3209                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3210                                         upper_32_bits(qman_base_addr));
3211
3212                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3213                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3214                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3215
3216                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3217                                                         QMAN_CPDMA_SIZE_OFFSET);
3218                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3219                                                         QMAN_CPDMA_SRC_OFFSET);
3220                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3221                                                         QMAN_CPDMA_DST_OFFSET);
3222         } else {
3223                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3224                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3225                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3226
3227                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3228                                                         QMAN_LDMA_SIZE_OFFSET);
3229                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3230                                                         QMAN_LDMA_SRC_OFFSET);
3231                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3232                                                         QMAN_LDMA_DST_OFFSET);
3233
3234                 /* Configure RAZWI IRQ */
3235                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3236                 if (hdev->stop_on_err)
3237                         tpc_qm_err_cfg |=
3238                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3239
3240                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3241
3242                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3243                         lower_32_bits(CFG_BASE + irq_handler_offset));
3244                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3245                         upper_32_bits(CFG_BASE + irq_handler_offset));
3246
3247                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3248                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3249                                                                         tpc_id);
3250
3251                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3252                                 QM_ARB_ERR_MSG_EN_MASK);
3253
3254                 /* Increase ARB WDT to support streams architecture */
3255                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3256                                 GAUDI_ARB_WDT_TIMEOUT);
3257
3258                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3259                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3260                                 QMAN_INTERNAL_MAKE_TRUSTED);
3261         }
3262
3263         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3264         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3265         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3266         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3267
3268         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3269         if (tpc_id == 6) {
3270                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3271                                 mtr_base_ws_lo);
3272                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3273                                 mtr_base_ws_hi);
3274                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3275                                 so_base_ws_lo);
3276                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3277                                 so_base_ws_hi);
3278         }
3279 }
3280
3281 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3282 {
3283         struct gaudi_device *gaudi = hdev->asic_specific;
3284         struct gaudi_internal_qman_info *q;
3285         u64 qman_base_addr;
3286         u32 so_base_hi, tpc_offset = 0;
3287         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3288                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3289         int i, tpc_id, internal_q_index;
3290
3291         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3292                 return;
3293
3294         so_base_hi = upper_32_bits(CFG_BASE +
3295                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3296
3297         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3298                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3299                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3300                                                 tpc_id * QMAN_STREAMS + i;
3301                         q = &gaudi->internal_qmans[internal_q_index];
3302                         qman_base_addr = (u64) q->pq_dma_addr;
3303                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3304                                                 qman_base_addr);
3305
3306                         if (i == 3) {
3307                                 /* Initializing lower CP for TPC QMAN */
3308                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3309
3310                                 /* Enable the QMAN and TPC channel */
3311                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3312                                                 QMAN_TPC_ENABLE);
3313                         }
3314                 }
3315
3316                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3317                                 so_base_hi);
3318
3319                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3320
3321                 gaudi->hw_cap_initialized |=
3322                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3323         }
3324 }
3325
3326 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3327                                 int qman_id, u64 qman_base_addr, int nic_id)
3328 {
3329         struct cpu_dyn_regs *dyn_regs =
3330                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3331         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3332         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3333         u32 nic_qm_err_cfg, irq_handler_offset;
3334         u32 q_off;
3335
3336         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3337                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3338         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3339                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3340         so_base_en_lo = lower_32_bits(CFG_BASE +
3341                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3342         so_base_en_hi = upper_32_bits(CFG_BASE +
3343                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3344         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3345                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3346         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3347                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3348         so_base_ws_lo = lower_32_bits(CFG_BASE +
3349                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3350         so_base_ws_hi = upper_32_bits(CFG_BASE +
3351                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3352
3353         q_off = nic_offset + qman_id * 4;
3354
3355         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3356         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3357
3358         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3359         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3360         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3361
3362         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3363                                                         QMAN_LDMA_SIZE_OFFSET);
3364         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3365                                                         QMAN_LDMA_SRC_OFFSET);
3366         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3367                                                         QMAN_LDMA_DST_OFFSET);
3368
3369         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3370         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3371         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3372         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3373
3374         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3375         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3376         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3377         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3378         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3379
3380         if (qman_id == 0) {
3381                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3382                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3383                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3384
3385                 /* Configure RAZWI IRQ */
3386                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3387                 if (hdev->stop_on_err)
3388                         nic_qm_err_cfg |=
3389                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3390
3391                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3392
3393                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3394                         lower_32_bits(CFG_BASE + irq_handler_offset));
3395                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3396                         upper_32_bits(CFG_BASE + irq_handler_offset));
3397
3398                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3399                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3400                                                                         nic_id);
3401
3402                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3403                                 QM_ARB_ERR_MSG_EN_MASK);
3404
3405                 /* Increase ARB WDT to support streams architecture */
3406                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3407                                 GAUDI_ARB_WDT_TIMEOUT);
3408
3409                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3410                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3411                                 QMAN_INTERNAL_MAKE_TRUSTED);
3412         }
3413 }
3414
3415 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3416 {
3417         struct gaudi_device *gaudi = hdev->asic_specific;
3418         struct gaudi_internal_qman_info *q;
3419         u64 qman_base_addr;
3420         u32 nic_offset = 0;
3421         u32 nic_delta_between_qmans =
3422                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3423         u32 nic_delta_between_nics =
3424                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3425         int i, nic_id, internal_q_index;
3426
3427         if (!hdev->nic_ports_mask)
3428                 return;
3429
3430         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3431                 return;
3432
3433         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3434
3435         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3436                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3437                         nic_offset += nic_delta_between_qmans;
3438                         if (nic_id & 1) {
3439                                 nic_offset -= (nic_delta_between_qmans * 2);
3440                                 nic_offset += nic_delta_between_nics;
3441                         }
3442                         continue;
3443                 }
3444
3445                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3446                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3447                                                 nic_id * QMAN_STREAMS + i;
3448                         q = &gaudi->internal_qmans[internal_q_index];
3449                         qman_base_addr = (u64) q->pq_dma_addr;
3450                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3451                                                 qman_base_addr, nic_id);
3452                 }
3453
3454                 /* Enable the QMAN */
3455                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3456
3457                 nic_offset += nic_delta_between_qmans;
3458                 if (nic_id & 1) {
3459                         nic_offset -= (nic_delta_between_qmans * 2);
3460                         nic_offset += nic_delta_between_nics;
3461                 }
3462
3463                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3464         }
3465 }
3466
3467 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3468 {
3469         struct gaudi_device *gaudi = hdev->asic_specific;
3470
3471         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3472                 return;
3473
3474         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3475         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3476         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3477 }
3478
3479 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3480 {
3481         struct gaudi_device *gaudi = hdev->asic_specific;
3482
3483         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3484                 return;
3485
3486         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3487         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3488         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3489         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3490         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3491 }
3492
3493 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3494 {
3495         struct gaudi_device *gaudi = hdev->asic_specific;
3496
3497         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3498                 return;
3499
3500         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3501         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3502 }
3503
3504 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3505 {
3506         struct gaudi_device *gaudi = hdev->asic_specific;
3507         u32 tpc_offset = 0;
3508         int tpc_id;
3509
3510         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3511                 return;
3512
3513         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3514                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3515                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3516         }
3517 }
3518
3519 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3520 {
3521         struct gaudi_device *gaudi = hdev->asic_specific;
3522         u32 nic_mask, nic_offset = 0;
3523         u32 nic_delta_between_qmans =
3524                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3525         u32 nic_delta_between_nics =
3526                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3527         int nic_id;
3528
3529         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3530                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3531
3532                 if (gaudi->hw_cap_initialized & nic_mask)
3533                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3534
3535                 nic_offset += nic_delta_between_qmans;
3536                 if (nic_id & 1) {
3537                         nic_offset -= (nic_delta_between_qmans * 2);
3538                         nic_offset += nic_delta_between_nics;
3539                 }
3540         }
3541 }
3542
3543 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3544 {
3545         struct gaudi_device *gaudi = hdev->asic_specific;
3546
3547         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3548                 return;
3549
3550         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3551         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3552         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554 }
3555
3556 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3557 {
3558         struct gaudi_device *gaudi = hdev->asic_specific;
3559
3560         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3561                 return;
3562
3563         /* Stop CPs of HBM DMA QMANs */
3564
3565         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3567         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3568         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3569         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3570 }
3571
3572 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3573 {
3574         struct gaudi_device *gaudi = hdev->asic_specific;
3575
3576         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3577                 return;
3578
3579         /* Stop CPs of MME QMANs */
3580         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582 }
3583
3584 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3585 {
3586         struct gaudi_device *gaudi = hdev->asic_specific;
3587
3588         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3589                 return;
3590
3591         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3592         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3593         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3594         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3595         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3596         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3597         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3598         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3599 }
3600
3601 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3602 {
3603         struct gaudi_device *gaudi = hdev->asic_specific;
3604
3605         /* Stop upper CPs of QMANs */
3606
3607         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3608                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3609                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3610                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3611                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3612
3613         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3614                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3615                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3616                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3617                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3618
3619         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3620                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3621                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3622                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3623                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3624
3625         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3626                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3627                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3628                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3629                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3630
3631         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3632                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3633                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3634                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3635                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3636
3637         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3638                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3639                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3640                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3641                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3642
3643         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3644                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3645                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3646                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3647                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3648
3649         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3650                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3651                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3652                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3653                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3654
3655         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3656                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3657                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3658                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3659                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3660
3661         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3662                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3663                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3664                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3665                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3666 }
3667
3668 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3669 {
3670         struct gaudi_device *gaudi = hdev->asic_specific;
3671
3672         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3673                 return;
3674
3675         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3678 }
3679
3680 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3681 {
3682         struct gaudi_device *gaudi = hdev->asic_specific;
3683
3684         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3685                 return;
3686
3687         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3688         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3689         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3690         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3691         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3692 }
3693
3694 static void gaudi_mme_stall(struct hl_device *hdev)
3695 {
3696         struct gaudi_device *gaudi = hdev->asic_specific;
3697
3698         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3699                 return;
3700
3701         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3702         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3703         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3704         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3705         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3706         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3707         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3708         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3709         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3710         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3711         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3712         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3713         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3714         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3715         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3716         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3717         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3718 }
3719
3720 static void gaudi_tpc_stall(struct hl_device *hdev)
3721 {
3722         struct gaudi_device *gaudi = hdev->asic_specific;
3723
3724         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3725                 return;
3726
3727         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3728         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3729         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3730         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3731         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3732         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3733         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3734         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3735 }
3736
3737 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3738 {
3739         u32 qman_offset;
3740         int i;
3741
3742         if (hdev->asic_prop.fw_security_enabled)
3743                 return;
3744
3745         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3746                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3747                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3748
3749                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3750         }
3751
3752         WREG32(mmMME0_QM_CGM_CFG, 0);
3753         WREG32(mmMME0_QM_CGM_CFG1, 0);
3754         WREG32(mmMME2_QM_CGM_CFG, 0);
3755         WREG32(mmMME2_QM_CGM_CFG1, 0);
3756
3757         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3758                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3759                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3760
3761                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3762         }
3763 }
3764
3765 static void gaudi_enable_timestamp(struct hl_device *hdev)
3766 {
3767         /* Disable the timestamp counter */
3768         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3769
3770         /* Zero the lower/upper parts of the 64-bit counter */
3771         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3772         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3773
3774         /* Enable the counter */
3775         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3776 }
3777
3778 static void gaudi_disable_timestamp(struct hl_device *hdev)
3779 {
3780         /* Disable the timestamp counter */
3781         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3782 }
3783
3784 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3785 {
3786         u32 wait_timeout_ms;
3787
3788         dev_info(hdev->dev,
3789                 "Halting compute engines and disabling interrupts\n");
3790
3791         if (hdev->pldm)
3792                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3793         else
3794                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3795
3796         if (fw_reset)
3797                 goto skip_engines;
3798
3799         gaudi_stop_nic_qmans(hdev);
3800         gaudi_stop_mme_qmans(hdev);
3801         gaudi_stop_tpc_qmans(hdev);
3802         gaudi_stop_hbm_dma_qmans(hdev);
3803         gaudi_stop_pci_dma_qmans(hdev);
3804
3805         msleep(wait_timeout_ms);
3806
3807         gaudi_pci_dma_stall(hdev);
3808         gaudi_hbm_dma_stall(hdev);
3809         gaudi_tpc_stall(hdev);
3810         gaudi_mme_stall(hdev);
3811
3812         msleep(wait_timeout_ms);
3813
3814         gaudi_disable_nic_qmans(hdev);
3815         gaudi_disable_mme_qmans(hdev);
3816         gaudi_disable_tpc_qmans(hdev);
3817         gaudi_disable_hbm_dma_qmans(hdev);
3818         gaudi_disable_pci_dma_qmans(hdev);
3819
3820         gaudi_disable_timestamp(hdev);
3821
3822 skip_engines:
3823         gaudi_disable_msi(hdev);
3824 }
3825
3826 static int gaudi_mmu_init(struct hl_device *hdev)
3827 {
3828         struct asic_fixed_properties *prop = &hdev->asic_prop;
3829         struct gaudi_device *gaudi = hdev->asic_specific;
3830         u64 hop0_addr;
3831         int rc, i;
3832
3833         if (!hdev->mmu_enable)
3834                 return 0;
3835
3836         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3837                 return 0;
3838
3839         for (i = 0 ; i < prop->max_asid ; i++) {
3840                 hop0_addr = prop->mmu_pgt_addr +
3841                                 (i * prop->mmu_hop_table_size);
3842
3843                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3844                 if (rc) {
3845                         dev_err(hdev->dev,
3846                                 "failed to set hop0 addr for asid %d\n", i);
3847                         goto err;
3848                 }
3849         }
3850
3851         /* init MMU cache manage page */
3852         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3853         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3854
3855         /* mem cache invalidation */
3856         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3857
3858         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3859
3860         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3861         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3862
3863         WREG32(mmSTLB_HOP_CONFIGURATION,
3864                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3865
3866         /*
3867          * The H/W expects the first PI after init to be 1. After wraparound
3868          * we'll write 0.
3869          */
3870         gaudi->mmu_cache_inv_pi = 1;
3871
3872         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3873
3874         return 0;
3875
3876 err:
3877         return rc;
3878 }
3879
3880 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3881 {
3882         void __iomem *dst;
3883
3884         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3885
3886         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3887 }
3888
3889 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3890 {
3891         void __iomem *dst;
3892
3893         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3894
3895         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3896 }
3897
3898 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3899 {
3900         struct dynamic_fw_load_mgr *dynamic_loader;
3901         struct cpu_dyn_regs *dyn_regs;
3902
3903         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3904
3905         /*
3906          * here we update initial values for few specific dynamic regs (as
3907          * before reading the first descriptor from FW those value has to be
3908          * hard-coded) in later stages of the protocol those values will be
3909          * updated automatically by reading the FW descriptor so data there
3910          * will always be up-to-date
3911          */
3912         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3913         dyn_regs->kmd_msg_to_cpu =
3914                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3915         dyn_regs->cpu_cmd_status_to_host =
3916                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3917
3918         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3919 }
3920
3921 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3922 {
3923         struct static_fw_load_mgr *static_loader;
3924
3925         static_loader = &hdev->fw_loader.static_loader;
3926
3927         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3928         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3929         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3930         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3931         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3932         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3933         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3934         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3935         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3936         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3937         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3938         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3939         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3940                         GAUDI_PLDM_RESET_WAIT_MSEC :
3941                         GAUDI_CPU_RESET_WAIT_MSEC;
3942 }
3943
3944 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3945 {
3946         struct asic_fixed_properties *prop = &hdev->asic_prop;
3947         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3948
3949         /* fill common fields */
3950         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3951         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3952         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3953         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3954         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3955         fw_loader->skip_bmc = !hdev->bmc_enable;
3956         fw_loader->sram_bar_id = SRAM_BAR_ID;
3957         fw_loader->dram_bar_id = HBM_BAR_ID;
3958
3959         if (prop->dynamic_fw_load)
3960                 gaudi_init_dynamic_firmware_loader(hdev);
3961         else
3962                 gaudi_init_static_firmware_loader(hdev);
3963 }
3964
3965 static int gaudi_init_cpu(struct hl_device *hdev)
3966 {
3967         struct gaudi_device *gaudi = hdev->asic_specific;
3968         int rc;
3969
3970         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3971                 return 0;
3972
3973         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3974                 return 0;
3975
3976         /*
3977          * The device CPU works with 40 bits addresses.
3978          * This register sets the extension to 50 bits.
3979          */
3980         if (!hdev->asic_prop.fw_security_enabled)
3981                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3982
3983         rc = hl_fw_init_cpu(hdev);
3984
3985         if (rc)
3986                 return rc;
3987
3988         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3989
3990         return 0;
3991 }
3992
3993 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3994 {
3995         struct cpu_dyn_regs *dyn_regs =
3996                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3997         struct asic_fixed_properties *prop = &hdev->asic_prop;
3998         struct gaudi_device *gaudi = hdev->asic_specific;
3999         u32 status, irq_handler_offset;
4000         struct hl_eq *eq;
4001         struct hl_hw_queue *cpu_pq =
4002                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4003         int err;
4004
4005         if (!hdev->cpu_queues_enable)
4006                 return 0;
4007
4008         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4009                 return 0;
4010
4011         eq = &hdev->event_queue;
4012
4013         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4014         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4015
4016         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4017         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4018
4019         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4020                         lower_32_bits(hdev->cpu_accessible_dma_address));
4021         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4022                         upper_32_bits(hdev->cpu_accessible_dma_address));
4023
4024         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4025         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4026         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4027
4028         /* Used for EQ CI */
4029         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4030
4031         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4032
4033         if (gaudi->multi_msi_mode)
4034                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4035         else
4036                 WREG32(mmCPU_IF_QUEUE_INIT,
4037                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4038
4039         irq_handler_offset = prop->gic_interrupts_enable ?
4040                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4041                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4042
4043         WREG32(irq_handler_offset,
4044                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4045
4046         err = hl_poll_timeout(
4047                 hdev,
4048                 mmCPU_IF_QUEUE_INIT,
4049                 status,
4050                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4051                 1000,
4052                 cpu_timeout);
4053
4054         if (err) {
4055                 dev_err(hdev->dev,
4056                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4057                 return -EIO;
4058         }
4059
4060         /* update FW application security bits */
4061         if (prop->fw_cpu_boot_dev_sts0_valid)
4062                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4063         if (prop->fw_cpu_boot_dev_sts1_valid)
4064                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4065
4066         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4067         return 0;
4068 }
4069
4070 static void gaudi_pre_hw_init(struct hl_device *hdev)
4071 {
4072         /* Perform read from the device to make sure device is up */
4073         RREG32(mmHW_STATE);
4074
4075         if (!hdev->asic_prop.fw_security_enabled) {
4076                 /* Set the access through PCI bars (Linux driver only) as
4077                  * secured
4078                  */
4079                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4080                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4081                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4082
4083                 /* Perform read to flush the waiting writes to ensure
4084                  * configuration was set in the device
4085                  */
4086                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4087         }
4088
4089         /*
4090          * Let's mark in the H/W that we have reached this point. We check
4091          * this value in the reset_before_init function to understand whether
4092          * we need to reset the chip before doing H/W init. This register is
4093          * cleared by the H/W upon H/W reset
4094          */
4095         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4096 }
4097
4098 static int gaudi_hw_init(struct hl_device *hdev)
4099 {
4100         struct gaudi_device *gaudi = hdev->asic_specific;
4101         int rc;
4102
4103         gaudi_pre_hw_init(hdev);
4104
4105         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4106          * So we set it here and if anyone tries to move it later to
4107          * a different address, there will be an error
4108          */
4109         if (hdev->asic_prop.iatu_done_by_fw)
4110                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4111
4112         /*
4113          * Before pushing u-boot/linux to device, need to set the hbm bar to
4114          * base address of dram
4115          */
4116         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4117                 dev_err(hdev->dev,
4118                         "failed to map HBM bar to DRAM base address\n");
4119                 return -EIO;
4120         }
4121
4122         rc = gaudi_init_cpu(hdev);
4123         if (rc) {
4124                 dev_err(hdev->dev, "failed to initialize CPU\n");
4125                 return rc;
4126         }
4127
4128         /* In case the clock gating was enabled in preboot we need to disable
4129          * it here before touching the MME/TPC registers.
4130          */
4131         gaudi_disable_clock_gating(hdev);
4132
4133         /* SRAM scrambler must be initialized after CPU is running from HBM */
4134         gaudi_init_scrambler_sram(hdev);
4135
4136         /* This is here just in case we are working without CPU */
4137         gaudi_init_scrambler_hbm(hdev);
4138
4139         gaudi_init_golden_registers(hdev);
4140
4141         rc = gaudi_mmu_init(hdev);
4142         if (rc)
4143                 return rc;
4144
4145         gaudi_init_security(hdev);
4146
4147         gaudi_init_pci_dma_qmans(hdev);
4148
4149         gaudi_init_hbm_dma_qmans(hdev);
4150
4151         gaudi_init_mme_qmans(hdev);
4152
4153         gaudi_init_tpc_qmans(hdev);
4154
4155         gaudi_init_nic_qmans(hdev);
4156
4157         gaudi_enable_timestamp(hdev);
4158
4159         /* MSI must be enabled before CPU queues and NIC are initialized */
4160         rc = gaudi_enable_msi(hdev);
4161         if (rc)
4162                 goto disable_queues;
4163
4164         /* must be called after MSI was enabled */
4165         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4166         if (rc) {
4167                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4168                         rc);
4169                 goto disable_msi;
4170         }
4171
4172         /* Perform read from the device to flush all configuration */
4173         RREG32(mmHW_STATE);
4174
4175         return 0;
4176
4177 disable_msi:
4178         gaudi_disable_msi(hdev);
4179 disable_queues:
4180         gaudi_disable_mme_qmans(hdev);
4181         gaudi_disable_pci_dma_qmans(hdev);
4182
4183         return rc;
4184 }
4185
4186 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4187 {
4188         struct cpu_dyn_regs *dyn_regs =
4189                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4190         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4191         struct gaudi_device *gaudi = hdev->asic_specific;
4192         bool driver_performs_reset;
4193
4194         if (!hard_reset) {
4195                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4196                 return;
4197         }
4198
4199         if (hdev->pldm) {
4200                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4201                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4202         } else {
4203                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4204                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4205         }
4206
4207         if (fw_reset) {
4208                 dev_info(hdev->dev,
4209                         "Firmware performs HARD reset, going to wait %dms\n",
4210                         reset_timeout_ms);
4211
4212                 goto skip_reset;
4213         }
4214
4215         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4216                                         !hdev->asic_prop.hard_reset_done_by_fw);
4217
4218         /* Set device to handle FLR by H/W as we will put the device CPU to
4219          * halt mode
4220          */
4221         if (driver_performs_reset)
4222                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4223                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4224
4225         /* If linux is loaded in the device CPU we need to communicate with it
4226          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4227          * registers in case of old F/Ws
4228          */
4229         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4230                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4231                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4232                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4233
4234                 WREG32(irq_handler_offset,
4235                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4236
4237                 /* This is a hail-mary attempt to revive the card in the small chance that the
4238                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4239                  * In that case, triggering reset through GIC won't help. We need to trigger the
4240                  * reset as if Linux wasn't loaded.
4241                  *
4242                  * We do it only if the reset cause was HB, because that would be the indication
4243                  * of such an event.
4244                  *
4245                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4246                  * damage.
4247                  */
4248                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4249                         if (hdev->asic_prop.hard_reset_done_by_fw)
4250                                 hl_fw_ask_hard_reset_without_linux(hdev);
4251                         else
4252                                 hl_fw_ask_halt_machine_without_linux(hdev);
4253                 }
4254         } else {
4255                 if (hdev->asic_prop.hard_reset_done_by_fw)
4256                         hl_fw_ask_hard_reset_without_linux(hdev);
4257                 else
4258                         hl_fw_ask_halt_machine_without_linux(hdev);
4259         }
4260
4261         if (driver_performs_reset) {
4262
4263                 /* Configure the reset registers. Must be done as early as
4264                  * possible in case we fail during H/W initialization
4265                  */
4266                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4267                                                 (CFG_RST_H_DMA_MASK |
4268                                                 CFG_RST_H_MME_MASK |
4269                                                 CFG_RST_H_SM_MASK |
4270                                                 CFG_RST_H_TPC_7_MASK));
4271
4272                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4273
4274                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4275                                                 (CFG_RST_H_HBM_MASK |
4276                                                 CFG_RST_H_TPC_7_MASK |
4277                                                 CFG_RST_H_NIC_MASK |
4278                                                 CFG_RST_H_SM_MASK |
4279                                                 CFG_RST_H_DMA_MASK |
4280                                                 CFG_RST_H_MME_MASK |
4281                                                 CFG_RST_H_CPU_MASK |
4282                                                 CFG_RST_H_MMU_MASK));
4283
4284                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4285                                                 (CFG_RST_L_IF_MASK |
4286                                                 CFG_RST_L_PSOC_MASK |
4287                                                 CFG_RST_L_TPC_MASK));
4288
4289                 msleep(cpu_timeout_ms);
4290
4291                 /* Tell ASIC not to re-initialize PCIe */
4292                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4293
4294                 /* Restart BTL/BLR upon hard-reset */
4295                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4296
4297                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4298                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4299
4300                 dev_info(hdev->dev,
4301                         "Issued HARD reset command, going to wait %dms\n",
4302                         reset_timeout_ms);
4303         } else {
4304                 dev_info(hdev->dev,
4305                         "Firmware performs HARD reset, going to wait %dms\n",
4306                         reset_timeout_ms);
4307         }
4308
4309 skip_reset:
4310         /*
4311          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4312          * itself is in reset. Need to wait until the reset is deasserted
4313          */
4314         msleep(reset_timeout_ms);
4315
4316         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4317         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4318                 dev_err(hdev->dev,
4319                         "Timeout while waiting for device to reset 0x%x\n",
4320                         status);
4321
4322         if (gaudi) {
4323                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4324                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4325                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4326                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4327                                                 HW_CAP_HBM_SCRAMBLER);
4328
4329                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4330
4331                 hdev->device_cpu_is_halted = false;
4332         }
4333 }
4334
4335 static int gaudi_suspend(struct hl_device *hdev)
4336 {
4337         int rc;
4338
4339         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4340         if (rc)
4341                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4342
4343         return rc;
4344 }
4345
4346 static int gaudi_resume(struct hl_device *hdev)
4347 {
4348         return gaudi_init_iatu(hdev);
4349 }
4350
4351 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4352                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4353 {
4354         int rc;
4355
4356         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4357                         VM_DONTCOPY | VM_NORESERVE;
4358
4359         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4360                                 (dma_addr - HOST_PHYS_BASE), size);
4361         if (rc)
4362                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4363
4364         return rc;
4365 }
4366
4367 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4368 {
4369         struct cpu_dyn_regs *dyn_regs =
4370                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4371         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4372         struct gaudi_device *gaudi = hdev->asic_specific;
4373         bool invalid_queue = false;
4374         int dma_id;
4375
4376         switch (hw_queue_id) {
4377         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4378                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4379                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4380                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4381                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4382                 break;
4383
4384         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4385                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4386                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4387                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4388                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4389                 break;
4390
4391         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4392                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4393                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4394                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4395                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4396                 break;
4397
4398         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4399                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4400                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4401                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4402                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4403                 break;
4404
4405         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4406                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4407                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4408                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4409                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4410                 break;
4411
4412         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4413                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4414                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4415                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4416                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4417                 break;
4418
4419         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4420                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4421                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4422                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4423                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4424                 break;
4425
4426         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4427                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4428                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4429                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4430                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4431                 break;
4432
4433         case GAUDI_QUEUE_ID_CPU_PQ:
4434                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4435                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4436                 else
4437                         invalid_queue = true;
4438                 break;
4439
4440         case GAUDI_QUEUE_ID_MME_0_0:
4441                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4442                 break;
4443
4444         case GAUDI_QUEUE_ID_MME_0_1:
4445                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4446                 break;
4447
4448         case GAUDI_QUEUE_ID_MME_0_2:
4449                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4450                 break;
4451
4452         case GAUDI_QUEUE_ID_MME_0_3:
4453                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4454                 break;
4455
4456         case GAUDI_QUEUE_ID_MME_1_0:
4457                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4458                 break;
4459
4460         case GAUDI_QUEUE_ID_MME_1_1:
4461                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4462                 break;
4463
4464         case GAUDI_QUEUE_ID_MME_1_2:
4465                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4466                 break;
4467
4468         case GAUDI_QUEUE_ID_MME_1_3:
4469                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4470                 break;
4471
4472         case GAUDI_QUEUE_ID_TPC_0_0:
4473                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4474                 break;
4475
4476         case GAUDI_QUEUE_ID_TPC_0_1:
4477                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4478                 break;
4479
4480         case GAUDI_QUEUE_ID_TPC_0_2:
4481                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4482                 break;
4483
4484         case GAUDI_QUEUE_ID_TPC_0_3:
4485                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4486                 break;
4487
4488         case GAUDI_QUEUE_ID_TPC_1_0:
4489                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4490                 break;
4491
4492         case GAUDI_QUEUE_ID_TPC_1_1:
4493                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4494                 break;
4495
4496         case GAUDI_QUEUE_ID_TPC_1_2:
4497                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4498                 break;
4499
4500         case GAUDI_QUEUE_ID_TPC_1_3:
4501                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4502                 break;
4503
4504         case GAUDI_QUEUE_ID_TPC_2_0:
4505                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4506                 break;
4507
4508         case GAUDI_QUEUE_ID_TPC_2_1:
4509                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4510                 break;
4511
4512         case GAUDI_QUEUE_ID_TPC_2_2:
4513                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4514                 break;
4515
4516         case GAUDI_QUEUE_ID_TPC_2_3:
4517                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4518                 break;
4519
4520         case GAUDI_QUEUE_ID_TPC_3_0:
4521                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4522                 break;
4523
4524         case GAUDI_QUEUE_ID_TPC_3_1:
4525                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4526                 break;
4527
4528         case GAUDI_QUEUE_ID_TPC_3_2:
4529                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4530                 break;
4531
4532         case GAUDI_QUEUE_ID_TPC_3_3:
4533                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4534                 break;
4535
4536         case GAUDI_QUEUE_ID_TPC_4_0:
4537                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4538                 break;
4539
4540         case GAUDI_QUEUE_ID_TPC_4_1:
4541                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4542                 break;
4543
4544         case GAUDI_QUEUE_ID_TPC_4_2:
4545                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4546                 break;
4547
4548         case GAUDI_QUEUE_ID_TPC_4_3:
4549                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4550                 break;
4551
4552         case GAUDI_QUEUE_ID_TPC_5_0:
4553                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4554                 break;
4555
4556         case GAUDI_QUEUE_ID_TPC_5_1:
4557                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4558                 break;
4559
4560         case GAUDI_QUEUE_ID_TPC_5_2:
4561                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4562                 break;
4563
4564         case GAUDI_QUEUE_ID_TPC_5_3:
4565                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4566                 break;
4567
4568         case GAUDI_QUEUE_ID_TPC_6_0:
4569                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4570                 break;
4571
4572         case GAUDI_QUEUE_ID_TPC_6_1:
4573                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4574                 break;
4575
4576         case GAUDI_QUEUE_ID_TPC_6_2:
4577                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4578                 break;
4579
4580         case GAUDI_QUEUE_ID_TPC_6_3:
4581                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4582                 break;
4583
4584         case GAUDI_QUEUE_ID_TPC_7_0:
4585                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4586                 break;
4587
4588         case GAUDI_QUEUE_ID_TPC_7_1:
4589                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4590                 break;
4591
4592         case GAUDI_QUEUE_ID_TPC_7_2:
4593                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4594                 break;
4595
4596         case GAUDI_QUEUE_ID_TPC_7_3:
4597                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4598                 break;
4599
4600         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4601                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4602                         invalid_queue = true;
4603
4604                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4605                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4606                 break;
4607
4608         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4609                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4610                         invalid_queue = true;
4611
4612                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4613                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4614                 break;
4615
4616         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4617                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4618                         invalid_queue = true;
4619
4620                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4621                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4622                 break;
4623
4624         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4625                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4626                         invalid_queue = true;
4627
4628                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4629                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4630                 break;
4631
4632         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4633                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4634                         invalid_queue = true;
4635
4636                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4637                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4638                 break;
4639
4640         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4641                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4642                         invalid_queue = true;
4643
4644                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4645                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4646                 break;
4647
4648         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4649                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4650                         invalid_queue = true;
4651
4652                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4653                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4654                 break;
4655
4656         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4657                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4658                         invalid_queue = true;
4659
4660                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4661                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4662                 break;
4663
4664         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4665                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4666                         invalid_queue = true;
4667
4668                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4669                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4670                 break;
4671
4672         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4673                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4674                         invalid_queue = true;
4675
4676                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4677                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4678                 break;
4679
4680         default:
4681                 invalid_queue = true;
4682         }
4683
4684         if (invalid_queue) {
4685                 /* Should never get here */
4686                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4687                         hw_queue_id);
4688                 return;
4689         }
4690
4691         db_value = pi;
4692
4693         /* ring the doorbell */
4694         WREG32(db_reg_offset, db_value);
4695
4696         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4697                 /* make sure device CPU will read latest data from host */
4698                 mb();
4699
4700                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4701                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4702                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4703
4704                 WREG32(irq_handler_offset,
4705                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4706         }
4707 }
4708
4709 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4710                                 struct hl_bd *bd)
4711 {
4712         __le64 *pbd = (__le64 *) bd;
4713
4714         /* The QMANs are on the host memory so a simple copy suffice */
4715         pqe[0] = pbd[0];
4716         pqe[1] = pbd[1];
4717 }
4718
4719 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4720                                         dma_addr_t *dma_handle, gfp_t flags)
4721 {
4722         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4723                                                 dma_handle, flags);
4724
4725         /* Shift to the device's base physical address of host memory */
4726         if (kernel_addr)
4727                 *dma_handle += HOST_PHYS_BASE;
4728
4729         return kernel_addr;
4730 }
4731
4732 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4733                 void *cpu_addr, dma_addr_t dma_handle)
4734 {
4735         /* Cancel the device's base physical address of host memory */
4736         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4737
4738         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4739 }
4740
4741 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4742 {
4743         struct asic_fixed_properties *prop = &hdev->asic_prop;
4744         u64  cur_addr = DRAM_BASE_ADDR_USER;
4745         u32 val;
4746         u32 chunk_size;
4747         int rc, dma_id;
4748
4749         while (cur_addr < prop->dram_end_address) {
4750                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4751                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4752
4753                         chunk_size =
4754                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4755
4756                         dev_dbg(hdev->dev,
4757                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4758                                 cur_addr, cur_addr + chunk_size);
4759
4760                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4761                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4762                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4763                                                 lower_32_bits(cur_addr));
4764                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4765                                                 upper_32_bits(cur_addr));
4766                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4767                                         chunk_size);
4768                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4769                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4770                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4771
4772                         cur_addr += chunk_size;
4773
4774                         if (cur_addr == prop->dram_end_address)
4775                                 break;
4776                 }
4777
4778                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4779                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4780
4781                         rc = hl_poll_timeout(
4782                                 hdev,
4783                                 mmDMA0_CORE_STS0 + dma_offset,
4784                                 val,
4785                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4786                                 1000,
4787                                 HBM_SCRUBBING_TIMEOUT_US);
4788
4789                         if (rc) {
4790                                 dev_err(hdev->dev,
4791                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4792                                         dma_id);
4793                                 return -EIO;
4794                         }
4795                 }
4796         }
4797
4798         return 0;
4799 }
4800
4801 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4802 {
4803         struct asic_fixed_properties *prop = &hdev->asic_prop;
4804         int rc = 0;
4805         u64 val = 0;
4806
4807         if (!hdev->memory_scrub)
4808                 return 0;
4809
4810         if (!addr && !size) {
4811                 /* Wait till device is idle */
4812                 rc = hl_poll_timeout(
4813                                 hdev,
4814                                 mmDMA0_CORE_STS0/* dummy */,
4815                                 val/* dummy */,
4816                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4817                                                 0, NULL)),
4818                                                 1000,
4819                                                 HBM_SCRUBBING_TIMEOUT_US);
4820                 if (rc) {
4821                         dev_err(hdev->dev, "waiting for idle timeout\n");
4822                         return -EIO;
4823                 }
4824
4825                 /* Scrub SRAM */
4826                 addr = prop->sram_user_base_address;
4827                 size = hdev->pldm ? 0x10000 :
4828                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4829                 val = 0x7777777777777777ull;
4830
4831                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4832                 if (rc) {
4833                         dev_err(hdev->dev,
4834                                 "Failed to clear SRAM in mem scrub all\n");
4835                         return rc;
4836                 }
4837
4838                 /* Scrub HBM using all DMA channels in parallel */
4839                 rc = gaudi_hbm_scrubbing(hdev);
4840                 if (rc)
4841                         dev_err(hdev->dev,
4842                                 "Failed to clear HBM in mem scrub all\n");
4843         }
4844
4845         return rc;
4846 }
4847
4848 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4849                                 u32 queue_id, dma_addr_t *dma_handle,
4850                                 u16 *queue_len)
4851 {
4852         struct gaudi_device *gaudi = hdev->asic_specific;
4853         struct gaudi_internal_qman_info *q;
4854
4855         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4856                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4857                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4858                 return NULL;
4859         }
4860
4861         q = &gaudi->internal_qmans[queue_id];
4862         *dma_handle = q->pq_dma_addr;
4863         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4864
4865         return q->pq_kernel_addr;
4866 }
4867
4868 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4869                                 u16 len, u32 timeout, u64 *result)
4870 {
4871         struct gaudi_device *gaudi = hdev->asic_specific;
4872
4873         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4874                 if (result)
4875                         *result = 0;
4876                 return 0;
4877         }
4878
4879         if (!timeout)
4880                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4881
4882         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4883                                                 timeout, result);
4884 }
4885
4886 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4887 {
4888         struct packet_msg_prot *fence_pkt;
4889         dma_addr_t pkt_dma_addr;
4890         u32 fence_val, tmp, timeout_usec;
4891         dma_addr_t fence_dma_addr;
4892         u32 *fence_ptr;
4893         int rc;
4894
4895         if (hdev->pldm)
4896                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4897         else
4898                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4899
4900         fence_val = GAUDI_QMAN0_FENCE_VAL;
4901
4902         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4903                                                         &fence_dma_addr);
4904         if (!fence_ptr) {
4905                 dev_err(hdev->dev,
4906                         "Failed to allocate memory for H/W queue %d testing\n",
4907                         hw_queue_id);
4908                 return -ENOMEM;
4909         }
4910
4911         *fence_ptr = 0;
4912
4913         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4914                                         sizeof(struct packet_msg_prot),
4915                                         GFP_KERNEL, &pkt_dma_addr);
4916         if (!fence_pkt) {
4917                 dev_err(hdev->dev,
4918                         "Failed to allocate packet for H/W queue %d testing\n",
4919                         hw_queue_id);
4920                 rc = -ENOMEM;
4921                 goto free_fence_ptr;
4922         }
4923
4924         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4925         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4926         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4927
4928         fence_pkt->ctl = cpu_to_le32(tmp);
4929         fence_pkt->value = cpu_to_le32(fence_val);
4930         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4931
4932         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4933                                         sizeof(struct packet_msg_prot),
4934                                         pkt_dma_addr);
4935         if (rc) {
4936                 dev_err(hdev->dev,
4937                         "Failed to send fence packet to H/W queue %d\n",
4938                         hw_queue_id);
4939                 goto free_pkt;
4940         }
4941
4942         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4943                                         1000, timeout_usec, true);
4944
4945         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4946
4947         if (rc == -ETIMEDOUT) {
4948                 dev_err(hdev->dev,
4949                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4950                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4951                 rc = -EIO;
4952         }
4953
4954 free_pkt:
4955         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4956                                         pkt_dma_addr);
4957 free_fence_ptr:
4958         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4959                                         fence_dma_addr);
4960         return rc;
4961 }
4962
4963 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4964 {
4965         struct gaudi_device *gaudi = hdev->asic_specific;
4966
4967         /*
4968          * check capability here as send_cpu_message() won't update the result
4969          * value if no capability
4970          */
4971         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4972                 return 0;
4973
4974         return hl_fw_test_cpu_queue(hdev);
4975 }
4976
4977 static int gaudi_test_queues(struct hl_device *hdev)
4978 {
4979         int i, rc, ret_val = 0;
4980
4981         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4982                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4983                         rc = gaudi_test_queue(hdev, i);
4984                         if (rc)
4985                                 ret_val = -EINVAL;
4986                 }
4987         }
4988
4989         rc = gaudi_test_cpu_queue(hdev);
4990         if (rc)
4991                 ret_val = -EINVAL;
4992
4993         return ret_val;
4994 }
4995
4996 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4997                 gfp_t mem_flags, dma_addr_t *dma_handle)
4998 {
4999         void *kernel_addr;
5000
5001         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5002                 return NULL;
5003
5004         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5005
5006         /* Shift to the device's base physical address of host memory */
5007         if (kernel_addr)
5008                 *dma_handle += HOST_PHYS_BASE;
5009
5010         return kernel_addr;
5011 }
5012
5013 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5014                         dma_addr_t dma_addr)
5015 {
5016         /* Cancel the device's base physical address of host memory */
5017         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5018
5019         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5020 }
5021
5022 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5023                                         size_t size, dma_addr_t *dma_handle)
5024 {
5025         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5026 }
5027
5028 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5029                                                 size_t size, void *vaddr)
5030 {
5031         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5032 }
5033
5034 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5035                         int nents, enum dma_data_direction dir)
5036 {
5037         struct scatterlist *sg;
5038         int i;
5039
5040         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5041                 return -ENOMEM;
5042
5043         /* Shift to the device's base physical address of host memory */
5044         for_each_sg(sgl, sg, nents, i)
5045                 sg->dma_address += HOST_PHYS_BASE;
5046
5047         return 0;
5048 }
5049
5050 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5051                         int nents, enum dma_data_direction dir)
5052 {
5053         struct scatterlist *sg;
5054         int i;
5055
5056         /* Cancel the device's base physical address of host memory */
5057         for_each_sg(sgl, sg, nents, i)
5058                 sg->dma_address -= HOST_PHYS_BASE;
5059
5060         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5061 }
5062
5063 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5064                                         struct sg_table *sgt)
5065 {
5066         struct scatterlist *sg, *sg_next_iter;
5067         u32 count, dma_desc_cnt;
5068         u64 len, len_next;
5069         dma_addr_t addr, addr_next;
5070
5071         dma_desc_cnt = 0;
5072
5073         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5074
5075                 len = sg_dma_len(sg);
5076                 addr = sg_dma_address(sg);
5077
5078                 if (len == 0)
5079                         break;
5080
5081                 while ((count + 1) < sgt->nents) {
5082                         sg_next_iter = sg_next(sg);
5083                         len_next = sg_dma_len(sg_next_iter);
5084                         addr_next = sg_dma_address(sg_next_iter);
5085
5086                         if (len_next == 0)
5087                                 break;
5088
5089                         if ((addr + len == addr_next) &&
5090                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5091                                 len += len_next;
5092                                 count++;
5093                                 sg = sg_next_iter;
5094                         } else {
5095                                 break;
5096                         }
5097                 }
5098
5099                 dma_desc_cnt++;
5100         }
5101
5102         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5103 }
5104
5105 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5106                                 struct hl_cs_parser *parser,
5107                                 struct packet_lin_dma *user_dma_pkt,
5108                                 u64 addr, enum dma_data_direction dir)
5109 {
5110         struct hl_userptr *userptr;
5111         int rc;
5112
5113         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5114                         parser->job_userptr_list, &userptr))
5115                 goto already_pinned;
5116
5117         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5118         if (!userptr)
5119                 return -ENOMEM;
5120
5121         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5122                                 userptr);
5123         if (rc)
5124                 goto free_userptr;
5125
5126         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5127
5128         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5129                                         userptr->sgt->nents, dir);
5130         if (rc) {
5131                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5132                 goto unpin_memory;
5133         }
5134
5135         userptr->dma_mapped = true;
5136         userptr->dir = dir;
5137
5138 already_pinned:
5139         parser->patched_cb_size +=
5140                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5141
5142         return 0;
5143
5144 unpin_memory:
5145         list_del(&userptr->job_node);
5146         hl_unpin_host_memory(hdev, userptr);
5147 free_userptr:
5148         kfree(userptr);
5149         return rc;
5150 }
5151
5152 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5153                                 struct hl_cs_parser *parser,
5154                                 struct packet_lin_dma *user_dma_pkt,
5155                                 bool src_in_host)
5156 {
5157         enum dma_data_direction dir;
5158         bool skip_host_mem_pin = false, user_memset;
5159         u64 addr;
5160         int rc = 0;
5161
5162         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5163                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5164                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5165
5166         if (src_in_host) {
5167                 if (user_memset)
5168                         skip_host_mem_pin = true;
5169
5170                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5171                 dir = DMA_TO_DEVICE;
5172                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5173         } else {
5174                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5175                 dir = DMA_FROM_DEVICE;
5176                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5177                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5178                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5179         }
5180
5181         if (skip_host_mem_pin)
5182                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5183         else
5184                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5185                                                 addr, dir);
5186
5187         return rc;
5188 }
5189
5190 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5191                                 struct hl_cs_parser *parser,
5192                                 struct packet_lin_dma *user_dma_pkt)
5193 {
5194         bool src_in_host = false;
5195         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5196                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5197                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5198
5199         dev_dbg(hdev->dev, "DMA packet details:\n");
5200         dev_dbg(hdev->dev, "source == 0x%llx\n",
5201                                 le64_to_cpu(user_dma_pkt->src_addr));
5202         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5203         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5204
5205         /*
5206          * Special handling for DMA with size 0. Bypass all validations
5207          * because no transactions will be done except for WR_COMP, which
5208          * is not a security issue
5209          */
5210         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5211                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5212                 return 0;
5213         }
5214
5215         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5216                 src_in_host = true;
5217
5218         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5219                                                 src_in_host);
5220 }
5221
5222 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5223                                         struct hl_cs_parser *parser,
5224                                         struct packet_load_and_exe *user_pkt)
5225 {
5226         u32 cfg;
5227
5228         cfg = le32_to_cpu(user_pkt->cfg);
5229
5230         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5231                 dev_err(hdev->dev,
5232                         "User not allowed to use Load and Execute\n");
5233                 return -EPERM;
5234         }
5235
5236         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5237
5238         return 0;
5239 }
5240
5241 static int gaudi_validate_cb(struct hl_device *hdev,
5242                         struct hl_cs_parser *parser, bool is_mmu)
5243 {
5244         u32 cb_parsed_length = 0;
5245         int rc = 0;
5246
5247         parser->patched_cb_size = 0;
5248
5249         /* cb_user_size is more than 0 so loop will always be executed */
5250         while (cb_parsed_length < parser->user_cb_size) {
5251                 enum packet_id pkt_id;
5252                 u16 pkt_size;
5253                 struct gaudi_packet *user_pkt;
5254
5255                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5256
5257                 pkt_id = (enum packet_id) (
5258                                 (le64_to_cpu(user_pkt->header) &
5259                                 PACKET_HEADER_PACKET_ID_MASK) >>
5260                                         PACKET_HEADER_PACKET_ID_SHIFT);
5261
5262                 if (!validate_packet_id(pkt_id)) {
5263                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5264                         rc = -EINVAL;
5265                         break;
5266                 }
5267
5268                 pkt_size = gaudi_packet_sizes[pkt_id];
5269                 cb_parsed_length += pkt_size;
5270                 if (cb_parsed_length > parser->user_cb_size) {
5271                         dev_err(hdev->dev,
5272                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5273                         rc = -EINVAL;
5274                         break;
5275                 }
5276
5277                 switch (pkt_id) {
5278                 case PACKET_MSG_PROT:
5279                         dev_err(hdev->dev,
5280                                 "User not allowed to use MSG_PROT\n");
5281                         rc = -EPERM;
5282                         break;
5283
5284                 case PACKET_CP_DMA:
5285                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5286                         rc = -EPERM;
5287                         break;
5288
5289                 case PACKET_STOP:
5290                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5291                         rc = -EPERM;
5292                         break;
5293
5294                 case PACKET_WREG_BULK:
5295                         dev_err(hdev->dev,
5296                                 "User not allowed to use WREG_BULK\n");
5297                         rc = -EPERM;
5298                         break;
5299
5300                 case PACKET_LOAD_AND_EXE:
5301                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5302                                 (struct packet_load_and_exe *) user_pkt);
5303                         break;
5304
5305                 case PACKET_LIN_DMA:
5306                         parser->contains_dma_pkt = true;
5307                         if (is_mmu)
5308                                 parser->patched_cb_size += pkt_size;
5309                         else
5310                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5311                                         (struct packet_lin_dma *) user_pkt);
5312                         break;
5313
5314                 case PACKET_WREG_32:
5315                 case PACKET_MSG_LONG:
5316                 case PACKET_MSG_SHORT:
5317                 case PACKET_REPEAT:
5318                 case PACKET_FENCE:
5319                 case PACKET_NOP:
5320                 case PACKET_ARB_POINT:
5321                         parser->patched_cb_size += pkt_size;
5322                         break;
5323
5324                 default:
5325                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5326                                 pkt_id);
5327                         rc = -EINVAL;
5328                         break;
5329                 }
5330
5331                 if (rc)
5332                         break;
5333         }
5334
5335         /*
5336          * The new CB should have space at the end for two MSG_PROT packets:
5337          * 1. A packet that will act as a completion packet
5338          * 2. A packet that will generate MSI-X interrupt
5339          */
5340         if (parser->completion)
5341                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5342
5343         return rc;
5344 }
5345
5346 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5347                                 struct hl_cs_parser *parser,
5348                                 struct packet_lin_dma *user_dma_pkt,
5349                                 struct packet_lin_dma *new_dma_pkt,
5350                                 u32 *new_dma_pkt_size)
5351 {
5352         struct hl_userptr *userptr;
5353         struct scatterlist *sg, *sg_next_iter;
5354         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5355         u64 len, len_next;
5356         dma_addr_t dma_addr, dma_addr_next;
5357         u64 device_memory_addr, addr;
5358         enum dma_data_direction dir;
5359         struct sg_table *sgt;
5360         bool src_in_host = false;
5361         bool skip_host_mem_pin = false;
5362         bool user_memset;
5363
5364         ctl = le32_to_cpu(user_dma_pkt->ctl);
5365
5366         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5367                 src_in_host = true;
5368
5369         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5370                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5371
5372         if (src_in_host) {
5373                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5374                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5375                 dir = DMA_TO_DEVICE;
5376                 if (user_memset)
5377                         skip_host_mem_pin = true;
5378         } else {
5379                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5380                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5381                 dir = DMA_FROM_DEVICE;
5382         }
5383
5384         if ((!skip_host_mem_pin) &&
5385                 (!hl_userptr_is_pinned(hdev, addr,
5386                                         le32_to_cpu(user_dma_pkt->tsize),
5387                                         parser->job_userptr_list, &userptr))) {
5388                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5389                                 addr, user_dma_pkt->tsize);
5390                 return -EFAULT;
5391         }
5392
5393         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5394                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5395                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5396                 return 0;
5397         }
5398
5399         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5400
5401         sgt = userptr->sgt;
5402         dma_desc_cnt = 0;
5403
5404         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5405                 len = sg_dma_len(sg);
5406                 dma_addr = sg_dma_address(sg);
5407
5408                 if (len == 0)
5409                         break;
5410
5411                 while ((count + 1) < sgt->nents) {
5412                         sg_next_iter = sg_next(sg);
5413                         len_next = sg_dma_len(sg_next_iter);
5414                         dma_addr_next = sg_dma_address(sg_next_iter);
5415
5416                         if (len_next == 0)
5417                                 break;
5418
5419                         if ((dma_addr + len == dma_addr_next) &&
5420                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5421                                 len += len_next;
5422                                 count++;
5423                                 sg = sg_next_iter;
5424                         } else {
5425                                 break;
5426                         }
5427                 }
5428
5429                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5430                 if (likely(dma_desc_cnt))
5431                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5432                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5433                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5434                 new_dma_pkt->tsize = cpu_to_le32(len);
5435
5436                 if (dir == DMA_TO_DEVICE) {
5437                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5438                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5439                 } else {
5440                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5441                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5442                 }
5443
5444                 if (!user_memset)
5445                         device_memory_addr += len;
5446                 dma_desc_cnt++;
5447                 new_dma_pkt++;
5448         }
5449
5450         if (!dma_desc_cnt) {
5451                 dev_err(hdev->dev,
5452                         "Error of 0 SG entries when patching DMA packet\n");
5453                 return -EFAULT;
5454         }
5455
5456         /* Fix the last dma packet - wrcomp must be as user set it */
5457         new_dma_pkt--;
5458         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5459
5460         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5461
5462         return 0;
5463 }
5464
5465 static int gaudi_patch_cb(struct hl_device *hdev,
5466                                 struct hl_cs_parser *parser)
5467 {
5468         u32 cb_parsed_length = 0;
5469         u32 cb_patched_cur_length = 0;
5470         int rc = 0;
5471
5472         /* cb_user_size is more than 0 so loop will always be executed */
5473         while (cb_parsed_length < parser->user_cb_size) {
5474                 enum packet_id pkt_id;
5475                 u16 pkt_size;
5476                 u32 new_pkt_size = 0;
5477                 struct gaudi_packet *user_pkt, *kernel_pkt;
5478
5479                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5480                 kernel_pkt = parser->patched_cb->kernel_address +
5481                                         cb_patched_cur_length;
5482
5483                 pkt_id = (enum packet_id) (
5484                                 (le64_to_cpu(user_pkt->header) &
5485                                 PACKET_HEADER_PACKET_ID_MASK) >>
5486                                         PACKET_HEADER_PACKET_ID_SHIFT);
5487
5488                 if (!validate_packet_id(pkt_id)) {
5489                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5490                         rc = -EINVAL;
5491                         break;
5492                 }
5493
5494                 pkt_size = gaudi_packet_sizes[pkt_id];
5495                 cb_parsed_length += pkt_size;
5496                 if (cb_parsed_length > parser->user_cb_size) {
5497                         dev_err(hdev->dev,
5498                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5499                         rc = -EINVAL;
5500                         break;
5501                 }
5502
5503                 switch (pkt_id) {
5504                 case PACKET_LIN_DMA:
5505                         rc = gaudi_patch_dma_packet(hdev, parser,
5506                                         (struct packet_lin_dma *) user_pkt,
5507                                         (struct packet_lin_dma *) kernel_pkt,
5508                                         &new_pkt_size);
5509                         cb_patched_cur_length += new_pkt_size;
5510                         break;
5511
5512                 case PACKET_MSG_PROT:
5513                         dev_err(hdev->dev,
5514                                 "User not allowed to use MSG_PROT\n");
5515                         rc = -EPERM;
5516                         break;
5517
5518                 case PACKET_CP_DMA:
5519                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5520                         rc = -EPERM;
5521                         break;
5522
5523                 case PACKET_STOP:
5524                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5525                         rc = -EPERM;
5526                         break;
5527
5528                 case PACKET_WREG_32:
5529                 case PACKET_WREG_BULK:
5530                 case PACKET_MSG_LONG:
5531                 case PACKET_MSG_SHORT:
5532                 case PACKET_REPEAT:
5533                 case PACKET_FENCE:
5534                 case PACKET_NOP:
5535                 case PACKET_ARB_POINT:
5536                 case PACKET_LOAD_AND_EXE:
5537                         memcpy(kernel_pkt, user_pkt, pkt_size);
5538                         cb_patched_cur_length += pkt_size;
5539                         break;
5540
5541                 default:
5542                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5543                                 pkt_id);
5544                         rc = -EINVAL;
5545                         break;
5546                 }
5547
5548                 if (rc)
5549                         break;
5550         }
5551
5552         return rc;
5553 }
5554
5555 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5556                 struct hl_cs_parser *parser)
5557 {
5558         u64 patched_cb_handle;
5559         u32 patched_cb_size;
5560         struct hl_cb *user_cb;
5561         int rc;
5562
5563         /*
5564          * The new CB should have space at the end for two MSG_PROT pkt:
5565          * 1. A packet that will act as a completion packet
5566          * 2. A packet that will generate MSI interrupt
5567          */
5568         if (parser->completion)
5569                 parser->patched_cb_size = parser->user_cb_size +
5570                                 sizeof(struct packet_msg_prot) * 2;
5571         else
5572                 parser->patched_cb_size = parser->user_cb_size;
5573
5574         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5575                                 parser->patched_cb_size, false, false,
5576                                 &patched_cb_handle);
5577
5578         if (rc) {
5579                 dev_err(hdev->dev,
5580                         "Failed to allocate patched CB for DMA CS %d\n",
5581                         rc);
5582                 return rc;
5583         }
5584
5585         patched_cb_handle >>= PAGE_SHIFT;
5586         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5587                                 (u32) patched_cb_handle);
5588         /* hl_cb_get should never fail */
5589         if (!parser->patched_cb) {
5590                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5591                         (u32) patched_cb_handle);
5592                 rc = -EFAULT;
5593                 goto out;
5594         }
5595
5596         /*
5597          * The check that parser->user_cb_size <= parser->user_cb->size was done
5598          * in validate_queue_index().
5599          */
5600         memcpy(parser->patched_cb->kernel_address,
5601                 parser->user_cb->kernel_address,
5602                 parser->user_cb_size);
5603
5604         patched_cb_size = parser->patched_cb_size;
5605
5606         /* Validate patched CB instead of user CB */
5607         user_cb = parser->user_cb;
5608         parser->user_cb = parser->patched_cb;
5609         rc = gaudi_validate_cb(hdev, parser, true);
5610         parser->user_cb = user_cb;
5611
5612         if (rc) {
5613                 hl_cb_put(parser->patched_cb);
5614                 goto out;
5615         }
5616
5617         if (patched_cb_size != parser->patched_cb_size) {
5618                 dev_err(hdev->dev, "user CB size mismatch\n");
5619                 hl_cb_put(parser->patched_cb);
5620                 rc = -EINVAL;
5621                 goto out;
5622         }
5623
5624 out:
5625         /*
5626          * Always call cb destroy here because we still have 1 reference
5627          * to it by calling cb_get earlier. After the job will be completed,
5628          * cb_put will release it, but here we want to remove it from the
5629          * idr
5630          */
5631         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5632                                         patched_cb_handle << PAGE_SHIFT);
5633
5634         return rc;
5635 }
5636
5637 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5638                 struct hl_cs_parser *parser)
5639 {
5640         u64 patched_cb_handle;
5641         int rc;
5642
5643         rc = gaudi_validate_cb(hdev, parser, false);
5644
5645         if (rc)
5646                 goto free_userptr;
5647
5648         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5649                                 parser->patched_cb_size, false, false,
5650                                 &patched_cb_handle);
5651         if (rc) {
5652                 dev_err(hdev->dev,
5653                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5654                 goto free_userptr;
5655         }
5656
5657         patched_cb_handle >>= PAGE_SHIFT;
5658         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5659                                 (u32) patched_cb_handle);
5660         /* hl_cb_get should never fail here */
5661         if (!parser->patched_cb) {
5662                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5663                                 (u32) patched_cb_handle);
5664                 rc = -EFAULT;
5665                 goto out;
5666         }
5667
5668         rc = gaudi_patch_cb(hdev, parser);
5669
5670         if (rc)
5671                 hl_cb_put(parser->patched_cb);
5672
5673 out:
5674         /*
5675          * Always call cb destroy here because we still have 1 reference
5676          * to it by calling cb_get earlier. After the job will be completed,
5677          * cb_put will release it, but here we want to remove it from the
5678          * idr
5679          */
5680         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5681                                 patched_cb_handle << PAGE_SHIFT);
5682
5683 free_userptr:
5684         if (rc)
5685                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5686         return rc;
5687 }
5688
5689 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5690                                         struct hl_cs_parser *parser)
5691 {
5692         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5693         struct gaudi_device *gaudi = hdev->asic_specific;
5694         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5695                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5696
5697         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5698                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5699                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5700                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5701                                 parser->hw_queue_id);
5702                 return -EINVAL;
5703         }
5704
5705         /* For internal queue jobs just check if CB address is valid */
5706         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5707                                         parser->user_cb_size,
5708                                         asic_prop->sram_user_base_address,
5709                                         asic_prop->sram_end_address))
5710                 return 0;
5711
5712         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5713                                         parser->user_cb_size,
5714                                         asic_prop->dram_user_base_address,
5715                                         asic_prop->dram_end_address))
5716                 return 0;
5717
5718         /* PMMU and HPMMU addresses are equal, check only one of them */
5719         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5720                                         parser->user_cb_size,
5721                                         asic_prop->pmmu.start_addr,
5722                                         asic_prop->pmmu.end_addr))
5723                 return 0;
5724
5725         dev_err(hdev->dev,
5726                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5727                 parser->user_cb, parser->user_cb_size);
5728
5729         return -EFAULT;
5730 }
5731
5732 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5733 {
5734         struct gaudi_device *gaudi = hdev->asic_specific;
5735
5736         if (parser->queue_type == QUEUE_TYPE_INT)
5737                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5738
5739         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5740                 return gaudi_parse_cb_mmu(hdev, parser);
5741         else
5742                 return gaudi_parse_cb_no_mmu(hdev, parser);
5743 }
5744
5745 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5746                                         void *kernel_address, u32 len,
5747                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5748                                         bool eb)
5749 {
5750         struct gaudi_device *gaudi = hdev->asic_specific;
5751         struct packet_msg_prot *cq_pkt;
5752         u64 msi_addr;
5753         u32 tmp;
5754
5755         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5756
5757         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5758         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5759
5760         if (eb)
5761                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5762
5763         cq_pkt->ctl = cpu_to_le32(tmp);
5764         cq_pkt->value = cpu_to_le32(cq_val);
5765         cq_pkt->addr = cpu_to_le64(cq_addr);
5766
5767         cq_pkt++;
5768
5769         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5770         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5771         cq_pkt->ctl = cpu_to_le32(tmp);
5772         cq_pkt->value = cpu_to_le32(1);
5773
5774         if (gaudi->multi_msi_mode)
5775                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5776         else
5777                 msi_addr = mmPCIE_CORE_MSI_REQ;
5778
5779         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5780 }
5781
5782 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5783 {
5784         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5785 }
5786
5787 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5788                                         u32 size, u64 val)
5789 {
5790         struct packet_lin_dma *lin_dma_pkt;
5791         struct hl_cs_job *job;
5792         u32 cb_size, ctl, err_cause;
5793         struct hl_cb *cb;
5794         u64 id;
5795         int rc;
5796
5797         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5798         if (!cb)
5799                 return -EFAULT;
5800
5801         lin_dma_pkt = cb->kernel_address;
5802         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5803         cb_size = sizeof(*lin_dma_pkt);
5804
5805         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5806         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5807         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5808         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5809         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5810
5811         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5812         lin_dma_pkt->src_addr = cpu_to_le64(val);
5813         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5814         lin_dma_pkt->tsize = cpu_to_le32(size);
5815
5816         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5817         if (!job) {
5818                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5819                 rc = -ENOMEM;
5820                 goto release_cb;
5821         }
5822
5823         /* Verify DMA is OK */
5824         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5825         if (err_cause && !hdev->init_done) {
5826                 dev_dbg(hdev->dev,
5827                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5828                         err_cause);
5829                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5830         }
5831
5832         job->id = 0;
5833         job->user_cb = cb;
5834         atomic_inc(&job->user_cb->cs_cnt);
5835         job->user_cb_size = cb_size;
5836         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5837         job->patched_cb = job->user_cb;
5838         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5839
5840         hl_debugfs_add_job(hdev, job);
5841
5842         rc = gaudi_send_job_on_qman0(hdev, job);
5843         hl_debugfs_remove_job(hdev, job);
5844         kfree(job);
5845         atomic_dec(&cb->cs_cnt);
5846
5847         /* Verify DMA is OK */
5848         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5849         if (err_cause) {
5850                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5851                 rc = -EIO;
5852                 if (!hdev->init_done) {
5853                         dev_dbg(hdev->dev,
5854                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5855                                 err_cause);
5856                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5857                 }
5858         }
5859
5860 release_cb:
5861         id = cb->id;
5862         hl_cb_put(cb);
5863         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5864
5865         return rc;
5866 }
5867
5868 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5869                                         u32 num_regs, u32 val)
5870 {
5871         struct packet_msg_long *pkt;
5872         struct hl_cs_job *job;
5873         u32 cb_size, ctl;
5874         struct hl_cb *cb;
5875         int i, rc;
5876
5877         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5878
5879         if (cb_size > SZ_2M) {
5880                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5881                 return -ENOMEM;
5882         }
5883
5884         cb = hl_cb_kernel_create(hdev, cb_size, false);
5885         if (!cb)
5886                 return -EFAULT;
5887
5888         pkt = cb->kernel_address;
5889
5890         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5891         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5892         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5893         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5894         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5895
5896         for (i = 0; i < num_regs ; i++, pkt++) {
5897                 pkt->ctl = cpu_to_le32(ctl);
5898                 pkt->value = cpu_to_le32(val);
5899                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5900         }
5901
5902         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5903         if (!job) {
5904                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5905                 rc = -ENOMEM;
5906                 goto release_cb;
5907         }
5908
5909         job->id = 0;
5910         job->user_cb = cb;
5911         atomic_inc(&job->user_cb->cs_cnt);
5912         job->user_cb_size = cb_size;
5913         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5914         job->patched_cb = job->user_cb;
5915         job->job_cb_size = cb_size;
5916
5917         hl_debugfs_add_job(hdev, job);
5918
5919         rc = gaudi_send_job_on_qman0(hdev, job);
5920         hl_debugfs_remove_job(hdev, job);
5921         kfree(job);
5922         atomic_dec(&cb->cs_cnt);
5923
5924 release_cb:
5925         hl_cb_put(cb);
5926         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5927
5928         return rc;
5929 }
5930
5931 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5932 {
5933         u64 base_addr;
5934         u32 num_regs;
5935         int rc;
5936
5937         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5938         num_regs = NUM_OF_SOB_IN_BLOCK;
5939         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5940         if (rc) {
5941                 dev_err(hdev->dev, "failed resetting SM registers");
5942                 return -ENOMEM;
5943         }
5944
5945         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5946         num_regs = NUM_OF_SOB_IN_BLOCK;
5947         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5948         if (rc) {
5949                 dev_err(hdev->dev, "failed resetting SM registers");
5950                 return -ENOMEM;
5951         }
5952
5953         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5954         num_regs = NUM_OF_SOB_IN_BLOCK;
5955         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5956         if (rc) {
5957                 dev_err(hdev->dev, "failed resetting SM registers");
5958                 return -ENOMEM;
5959         }
5960
5961         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5962         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5963         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5964         if (rc) {
5965                 dev_err(hdev->dev, "failed resetting SM registers");
5966                 return -ENOMEM;
5967         }
5968
5969         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5970         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5971         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5972         if (rc) {
5973                 dev_err(hdev->dev, "failed resetting SM registers");
5974                 return -ENOMEM;
5975         }
5976
5977         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5978         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5979         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5980         if (rc) {
5981                 dev_err(hdev->dev, "failed resetting SM registers");
5982                 return -ENOMEM;
5983         }
5984
5985         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5986                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5987         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5988         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5989         if (rc) {
5990                 dev_err(hdev->dev, "failed resetting SM registers");
5991                 return -ENOMEM;
5992         }
5993
5994         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5995                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5996         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5997         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5998         if (rc) {
5999                 dev_err(hdev->dev, "failed resetting SM registers");
6000                 return -ENOMEM;
6001         }
6002
6003         return 0;
6004 }
6005
6006 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6007 {
6008         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6009                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6010         int i;
6011
6012         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6013                 u64 sob_addr = CFG_BASE +
6014                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6015                                 (i * sob_delta);
6016                 u32 dma_offset = i * DMA_CORE_OFFSET;
6017
6018                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6019                                 lower_32_bits(sob_addr));
6020                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6021                                 upper_32_bits(sob_addr));
6022                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6023
6024                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6025                  * modified by the user for SRAM reduction
6026                  */
6027                 if (i > 1)
6028                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6029                                                                 0x00000001);
6030         }
6031 }
6032
6033 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6034 {
6035         u32 qman_offset;
6036         int i;
6037
6038         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6039                 qman_offset = i * DMA_QMAN_OFFSET;
6040                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6041         }
6042
6043         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6044                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6045                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6046         }
6047
6048         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6049                 qman_offset = i * TPC_QMAN_OFFSET;
6050                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6051         }
6052
6053         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6054                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6055                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6056                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6057         }
6058 }
6059
6060 static int gaudi_restore_user_registers(struct hl_device *hdev)
6061 {
6062         int rc;
6063
6064         rc = gaudi_restore_sm_registers(hdev);
6065         if (rc)
6066                 return rc;
6067
6068         gaudi_restore_dma_registers(hdev);
6069         gaudi_restore_qm_registers(hdev);
6070
6071         return 0;
6072 }
6073
6074 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6075 {
6076         return 0;
6077 }
6078
6079 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6080 {
6081         struct asic_fixed_properties *prop = &hdev->asic_prop;
6082         struct gaudi_device *gaudi = hdev->asic_specific;
6083         u64 addr = prop->mmu_pgt_addr;
6084         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6085
6086         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6087                 return 0;
6088
6089         return gaudi_memset_device_memory(hdev, addr, size, 0);
6090 }
6091
6092 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6093 {
6094
6095 }
6096
6097 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6098                         bool user_address, u32 *val)
6099 {
6100         struct asic_fixed_properties *prop = &hdev->asic_prop;
6101         u64 hbm_bar_addr, host_phys_end;
6102         int rc = 0;
6103
6104         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6105
6106         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6107
6108                 *val = RREG32(addr - CFG_BASE);
6109
6110         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6111
6112                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6113
6114         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6115
6116                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6117
6118                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6119
6120                 if (hbm_bar_addr != U64_MAX) {
6121                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6122                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6123                 }
6124
6125                 if (hbm_bar_addr == U64_MAX)
6126                         rc = -EIO;
6127
6128         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6129                         user_address && !iommu_present(&pci_bus_type)) {
6130
6131                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6132
6133         } else {
6134                 rc = -EFAULT;
6135         }
6136
6137         return rc;
6138 }
6139
6140 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6141                         bool user_address, u32 val)
6142 {
6143         struct asic_fixed_properties *prop = &hdev->asic_prop;
6144         u64 hbm_bar_addr, host_phys_end;
6145         int rc = 0;
6146
6147         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6148
6149         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6150
6151                 WREG32(addr - CFG_BASE, val);
6152
6153         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6154
6155                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6156
6157         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6158
6159                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6160
6161                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6162
6163                 if (hbm_bar_addr != U64_MAX) {
6164                         writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6165                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6166                 }
6167
6168                 if (hbm_bar_addr == U64_MAX)
6169                         rc = -EIO;
6170
6171         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6172                         user_address && !iommu_present(&pci_bus_type)) {
6173
6174                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6175
6176         } else {
6177                 rc = -EFAULT;
6178         }
6179
6180         return rc;
6181 }
6182
6183 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6184                                 bool user_address, u64 *val)
6185 {
6186         struct asic_fixed_properties *prop = &hdev->asic_prop;
6187         u64 hbm_bar_addr, host_phys_end;
6188         int rc = 0;
6189
6190         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6191
6192         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6193
6194                 u32 val_l = RREG32(addr - CFG_BASE);
6195                 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6196
6197                 *val = (((u64) val_h) << 32) | val_l;
6198
6199         } else if ((addr >= SRAM_BASE_ADDR) &&
6200                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6201
6202                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6203
6204         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6205
6206                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6207
6208                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6209
6210                 if (hbm_bar_addr != U64_MAX) {
6211                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6212                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6213                 }
6214
6215                 if (hbm_bar_addr == U64_MAX)
6216                         rc = -EIO;
6217
6218         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6219                         user_address && !iommu_present(&pci_bus_type)) {
6220
6221                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6222
6223         } else {
6224                 rc = -EFAULT;
6225         }
6226
6227         return rc;
6228 }
6229
6230 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6231                                 bool user_address, u64 val)
6232 {
6233         struct asic_fixed_properties *prop = &hdev->asic_prop;
6234         u64 hbm_bar_addr, host_phys_end;
6235         int rc = 0;
6236
6237         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6238
6239         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6240
6241                 WREG32(addr - CFG_BASE, lower_32_bits(val));
6242                 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
6243
6244         } else if ((addr >= SRAM_BASE_ADDR) &&
6245                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6246
6247                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6248
6249         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6250
6251                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6252
6253                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6254
6255                 if (hbm_bar_addr != U64_MAX) {
6256                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6257                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6258                 }
6259
6260                 if (hbm_bar_addr == U64_MAX)
6261                         rc = -EIO;
6262
6263         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6264                         user_address && !iommu_present(&pci_bus_type)) {
6265
6266                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6267
6268         } else {
6269                 rc = -EFAULT;
6270         }
6271
6272         return rc;
6273 }
6274
6275 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6276                                         u32 size_to_dma, dma_addr_t dma_addr)
6277 {
6278         u32 err_cause, val;
6279         u64 dma_offset;
6280         int rc;
6281
6282         dma_offset = dma_id * DMA_CORE_OFFSET;
6283
6284         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6285         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6286         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6287         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6288         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6289         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6290                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6291
6292         rc = hl_poll_timeout(
6293                 hdev,
6294                 mmDMA0_CORE_STS0 + dma_offset,
6295                 val,
6296                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6297                 0,
6298                 1000000);
6299
6300         if (rc) {
6301                 dev_err(hdev->dev,
6302                         "DMA %d timed-out during reading of 0x%llx\n",
6303                         dma_id, addr);
6304                 return -EIO;
6305         }
6306
6307         /* Verify DMA is OK */
6308         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6309         if (err_cause) {
6310                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6311                 dev_dbg(hdev->dev,
6312                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6313                         err_cause);
6314                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6315
6316                 return -EIO;
6317         }
6318
6319         return 0;
6320 }
6321
6322 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6323                                 void *blob_addr)
6324 {
6325         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6326         u32 qm_glbl_sts0, qm_cgm_sts;
6327         u64 dma_offset, qm_offset;
6328         dma_addr_t dma_addr;
6329         void *kernel_addr;
6330         bool is_eng_idle;
6331         int rc = 0, dma_id;
6332
6333         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6334                                                 hdev, SZ_2M,
6335                                                 &dma_addr,
6336                                                 GFP_KERNEL | __GFP_ZERO);
6337
6338         if (!kernel_addr)
6339                 return -ENOMEM;
6340
6341         hdev->asic_funcs->hw_queues_lock(hdev);
6342
6343         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6344         dma_offset = dma_id * DMA_CORE_OFFSET;
6345         qm_offset = dma_id * DMA_QMAN_OFFSET;
6346         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6347         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6348         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6349         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6350                       IS_DMA_IDLE(dma_core_sts0);
6351
6352         if (!is_eng_idle) {
6353                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6354                 dma_offset = dma_id * DMA_CORE_OFFSET;
6355                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6356                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6357                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6358                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6359                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6360                               IS_DMA_IDLE(dma_core_sts0);
6361
6362                 if (!is_eng_idle) {
6363                         dev_err_ratelimited(hdev->dev,
6364                                 "Can't read via DMA because it is BUSY\n");
6365                         rc = -EAGAIN;
6366                         goto out;
6367                 }
6368         }
6369
6370         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6371         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6372                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6373
6374         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6375          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6376          * ASID
6377          */
6378         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6379
6380         /* Verify DMA is OK */
6381         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6382         if (err_cause) {
6383                 dev_dbg(hdev->dev,
6384                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6385                         err_cause);
6386                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6387         }
6388
6389         pos = 0;
6390         size_left = size;
6391         size_to_dma = SZ_2M;
6392
6393         while (size_left > 0) {
6394
6395                 if (size_left < SZ_2M)
6396                         size_to_dma = size_left;
6397
6398                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6399                                                 dma_addr);
6400                 if (rc)
6401                         break;
6402
6403                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6404
6405                 if (size_left <= SZ_2M)
6406                         break;
6407
6408                 pos += SZ_2M;
6409                 addr += SZ_2M;
6410                 size_left -= SZ_2M;
6411         }
6412
6413         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6414          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6415          * ASID
6416          */
6417         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6418                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6419
6420         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6421
6422 out:
6423         hdev->asic_funcs->hw_queues_unlock(hdev);
6424
6425         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6426                                                 dma_addr);
6427
6428         return rc;
6429 }
6430
6431 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6432 {
6433         struct gaudi_device *gaudi = hdev->asic_specific;
6434
6435         if (hdev->reset_info.hard_reset_pending)
6436                 return U64_MAX;
6437
6438         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6439                         (addr - gaudi->hbm_bar_cur_addr));
6440 }
6441
6442 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6443 {
6444         struct gaudi_device *gaudi = hdev->asic_specific;
6445
6446         if (hdev->reset_info.hard_reset_pending)
6447                 return;
6448
6449         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6450                         (addr - gaudi->hbm_bar_cur_addr));
6451 }
6452
6453 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6454 {
6455         /* mask to zero the MMBP and ASID bits */
6456         WREG32_AND(reg, ~0x7FF);
6457         WREG32_OR(reg, asid);
6458 }
6459
6460 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6461 {
6462         struct gaudi_device *gaudi = hdev->asic_specific;
6463
6464         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6465                 return;
6466
6467         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6468                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6469                 return;
6470         }
6471
6472         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6473         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6474         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6475         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6476         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6477
6478         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6479         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6480         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6481         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6482         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6483
6484         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6485         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6486         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6487         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6488         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6489
6490         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6491         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6492         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6493         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6494         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6495
6496         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6497         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6498         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6499         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6500         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6501
6502         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6503         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6504         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6505         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6506         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6507
6508         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6509         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6510         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6511         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6512         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6513
6514         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6515         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6516         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6517         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6518         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6519
6520         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6521         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6522         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6523         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6524         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6525         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6526         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6527         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6528
6529         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6530         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6531         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6532         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6533         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6534         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6535         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6536
6537         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6538         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6539         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6540         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6541         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6542         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6543         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6544
6545         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6547         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6548         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6550         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6551         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6552
6553         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6555         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6556         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6557         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6560
6561         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6562         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6563         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6568
6569         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6576
6577         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6584
6585         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6592
6593         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6603
6604         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6616
6617         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6618                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6619                                 asid);
6620                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6621                                 asid);
6622                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6623                                 asid);
6624                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6625                                 asid);
6626                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6627                                 asid);
6628         }
6629
6630         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6631                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6632                                 asid);
6633                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6634                                 asid);
6635                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6636                                 asid);
6637                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6638                                 asid);
6639                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6640                                 asid);
6641         }
6642
6643         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6644                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6645                                 asid);
6646                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6647                                 asid);
6648                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6649                                 asid);
6650                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6651                                 asid);
6652                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6653                                 asid);
6654         }
6655
6656         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6657                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6658                                 asid);
6659                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6660                                 asid);
6661                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6662                                 asid);
6663                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6664                                 asid);
6665                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6666                                 asid);
6667         }
6668
6669         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6670                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6671                                 asid);
6672                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6673                                 asid);
6674                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6675                                 asid);
6676                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6677                                 asid);
6678                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6679                                 asid);
6680         }
6681
6682         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6683                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6684                                 asid);
6685                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6686                                 asid);
6687                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6688                                 asid);
6689                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6690                                 asid);
6691                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6692                                 asid);
6693         }
6694
6695         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6696                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6697                                 asid);
6698                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6699                                 asid);
6700                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6701                                 asid);
6702                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6703                                 asid);
6704                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6705                                 asid);
6706         }
6707
6708         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6709                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6710                                 asid);
6711                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6712                                 asid);
6713                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6714                                 asid);
6715                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6716                                 asid);
6717                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6718                                 asid);
6719         }
6720
6721         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6722                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6723                                 asid);
6724                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6725                                 asid);
6726                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6727                                 asid);
6728                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6729                                 asid);
6730                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6731                                 asid);
6732         }
6733
6734         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6735                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6736                                 asid);
6737                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6738                                 asid);
6739                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6740                                 asid);
6741                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6742                                 asid);
6743                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6744                                 asid);
6745         }
6746
6747         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6748         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6749 }
6750
6751 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6752                 struct hl_cs_job *job)
6753 {
6754         struct packet_msg_prot *fence_pkt;
6755         u32 *fence_ptr;
6756         dma_addr_t fence_dma_addr;
6757         struct hl_cb *cb;
6758         u32 tmp, timeout, dma_offset;
6759         int rc;
6760
6761         if (hdev->pldm)
6762                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6763         else
6764                 timeout = HL_DEVICE_TIMEOUT_USEC;
6765
6766         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6767                 dev_err_ratelimited(hdev->dev,
6768                         "Can't send driver job on QMAN0 because the device is not idle\n");
6769                 return -EBUSY;
6770         }
6771
6772         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6773                                                         &fence_dma_addr);
6774         if (!fence_ptr) {
6775                 dev_err(hdev->dev,
6776                         "Failed to allocate fence memory for QMAN0\n");
6777                 return -ENOMEM;
6778         }
6779
6780         cb = job->patched_cb;
6781
6782         fence_pkt = cb->kernel_address +
6783                         job->job_cb_size - sizeof(struct packet_msg_prot);
6784
6785         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6786         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6787         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6788
6789         fence_pkt->ctl = cpu_to_le32(tmp);
6790         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6791         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6792
6793         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6794
6795         WREG32(mmDMA0_CORE_PROT + dma_offset,
6796                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6797
6798         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6799                                         job->job_cb_size, cb->bus_address);
6800         if (rc) {
6801                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6802                 goto free_fence_ptr;
6803         }
6804
6805         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6806                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6807                                 timeout, true);
6808
6809         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6810
6811         if (rc == -ETIMEDOUT) {
6812                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6813                 goto free_fence_ptr;
6814         }
6815
6816 free_fence_ptr:
6817         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6818
6819         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6820                                         fence_dma_addr);
6821         return rc;
6822 }
6823
6824 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6825 {
6826         if (event_type >= GAUDI_EVENT_SIZE)
6827                 goto event_not_supported;
6828
6829         if (!gaudi_irq_map_table[event_type].valid)
6830                 goto event_not_supported;
6831
6832         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6833
6834         return;
6835
6836 event_not_supported:
6837         snprintf(desc, size, "N/A");
6838 }
6839
6840 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6841                                                         bool is_write, s32 *engine_id_1,
6842                                                         s32 *engine_id_2)
6843 {
6844         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6845
6846         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6847                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6848
6849         switch (x_y) {
6850         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6851         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6852                 dma_id[0] = 0;
6853                 dma_id[1] = 2;
6854                 break;
6855         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6856         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6857                 dma_id[0] = 1;
6858                 dma_id[1] = 3;
6859                 break;
6860         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6861         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6862                 dma_id[0] = 4;
6863                 dma_id[1] = 6;
6864                 break;
6865         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6866         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6867                 dma_id[0] = 5;
6868                 dma_id[1] = 7;
6869                 break;
6870         default:
6871                 goto unknown_initiator;
6872         }
6873
6874         for (i = 0 ; i < 2 ; i++) {
6875                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6876                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6877         }
6878
6879         switch (x_y) {
6880         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6881         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6882                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6883                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6884                         return "DMA0";
6885                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6886                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6887                         return "DMA2";
6888                 } else {
6889                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6890                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6891                         return "DMA0 or DMA2";
6892                 }
6893         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6894         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6895                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6896                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6897                         return "DMA1";
6898                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6899                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6900                         return "DMA3";
6901                 } else {
6902                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6903                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6904                         return "DMA1 or DMA3";
6905                 }
6906         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6907         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6908                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6909                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6910                         return "DMA4";
6911                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6912                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6913                         return "DMA6";
6914                 } else {
6915                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6916                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6917                         return "DMA4 or DMA6";
6918                 }
6919         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6920         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6921                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6922                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6923                         return "DMA5";
6924                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6925                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6926                         return "DMA7";
6927                 } else {
6928                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6929                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6930                         return "DMA5 or DMA7";
6931                 }
6932         }
6933
6934 unknown_initiator:
6935         return "unknown initiator";
6936 }
6937
6938 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6939                                                         u32 *engine_id_1, u32 *engine_id_2)
6940 {
6941         u32 val, x_y, axi_id;
6942
6943         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6944                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6945         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6946                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6947         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6948                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6949
6950         switch (x_y) {
6951         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6952                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6953                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6954                         return "TPC0";
6955                 }
6956                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6957                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6958                         return "NIC0";
6959                 }
6960                 break;
6961         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6962                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6963                 return "TPC1";
6964         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6965         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6966                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6967                 return "MME0";
6968         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6969         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6970                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6971                 return "MME1";
6972         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6973                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6974                 return "TPC2";
6975         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6976                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6977                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6978                         return "TPC3";
6979                 }
6980                 /* PCI, CPU or PSOC does not have engine id*/
6981                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6982                         return "PCI";
6983                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6984                         return "CPU";
6985                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6986                         return "PSOC";
6987                 break;
6988         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6989         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6990         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6991         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6992         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6993         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6994         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6995         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6996                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6997                                 engine_id_1, engine_id_2);
6998         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6999                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7000                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
7001                         return "TPC4";
7002                 }
7003                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7004                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
7005                         return "NIC1";
7006                 }
7007                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7008                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
7009                         return "NIC2";
7010                 }
7011                 break;
7012         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7013                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
7014                 return "TPC5";
7015         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7016         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7017                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
7018                 return "MME2";
7019         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7020         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7021                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
7022                 return "MME3";
7023         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7024                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
7025                 return "TPC6";
7026         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7027                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7028                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
7029                         return "TPC7";
7030                 }
7031                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7032                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
7033                         return "NIC4";
7034                 }
7035                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7036                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
7037                         return "NIC5";
7038                 }
7039                 break;
7040         default:
7041                 break;
7042         }
7043
7044         dev_err(hdev->dev,
7045                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7046                 val,
7047                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7048                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7049                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7050                         RAZWI_INITIATOR_AXI_ID_MASK);
7051
7052         return "unknown initiator";
7053 }
7054
7055 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
7056                                                 u32 *engine_id_2)
7057 {
7058
7059         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7060                 dev_err_ratelimited(hdev->dev,
7061                         "RAZWI event caused by illegal write of %s\n",
7062                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
7063                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7064         }
7065
7066         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7067                 dev_err_ratelimited(hdev->dev,
7068                         "RAZWI event caused by illegal read of %s\n",
7069                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
7070                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7071         }
7072 }
7073
7074 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
7075 {
7076         struct gaudi_device *gaudi = hdev->asic_specific;
7077         u32 val;
7078
7079         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7080                 return;
7081
7082         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7083         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7084                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7085                 *addr <<= 32;
7086                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7087
7088                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
7089                 *type = HL_RAZWI_PAGE_FAULT;
7090
7091                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7092         }
7093
7094         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7095         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7096                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7097                 *addr <<= 32;
7098                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7099
7100                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
7101                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
7102
7103                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7104         }
7105 }
7106
7107 /*
7108  *  +-------------------+------------------------------------------------------+
7109  *  | Configuration Reg |                     Description                      |
7110  *  |      Address      |                                                      |
7111  *  +-------------------+------------------------------------------------------+
7112  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7113  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7114  *  |                   |0xF34 memory wrappers 63:32                           |
7115  *  |                   |0xF38 memory wrappers 95:64                           |
7116  *  |                   |0xF3C memory wrappers 127:96                          |
7117  *  +-------------------+------------------------------------------------------+
7118  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7119  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7120  *  |                   |0xF44 memory wrappers 63:32                           |
7121  *  |                   |0xF48 memory wrappers 95:64                           |
7122  *  |                   |0xF4C memory wrappers 127:96                          |
7123  *  +-------------------+------------------------------------------------------+
7124  */
7125 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7126                 struct ecc_info_extract_params *params, u64 *ecc_address,
7127                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7128 {
7129         u32 i, num_mem_regs, reg, err_bit;
7130         u64 err_addr, err_word = 0;
7131
7132         num_mem_regs = params->num_memories / 32 +
7133                         ((params->num_memories % 32) ? 1 : 0);
7134
7135         if (params->block_address >= CFG_BASE)
7136                 params->block_address -= CFG_BASE;
7137
7138         if (params->derr)
7139                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7140         else
7141                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7142
7143         /* Set invalid wrapper index */
7144         *memory_wrapper_idx = 0xFF;
7145
7146         /* Iterate through memory wrappers, a single bit must be set */
7147         for (i = 0 ; i < num_mem_regs ; i++) {
7148                 err_addr += i * 4;
7149                 err_word = RREG32(err_addr);
7150                 if (err_word) {
7151                         err_bit = __ffs(err_word);
7152                         *memory_wrapper_idx = err_bit + (32 * i);
7153                         break;
7154                 }
7155         }
7156
7157         if (*memory_wrapper_idx == 0xFF) {
7158                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7159                 return -EINVAL;
7160         }
7161
7162         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7163                         *memory_wrapper_idx);
7164
7165         *ecc_address =
7166                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7167         *ecc_syndrom =
7168                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7169
7170         /* Clear error indication */
7171         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7172         if (params->derr)
7173                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7174         else
7175                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7176
7177         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7178
7179         return 0;
7180 }
7181
7182 /*
7183  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7184  *
7185  * @idx: the current pi/ci value
7186  * @q_len: the queue length (power of 2)
7187  *
7188  * @return the cyclically decremented index
7189  */
7190 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7191 {
7192         u32 mask = q_len - 1;
7193
7194         /*
7195          * modular decrement is equivalent to adding (queue_size -1)
7196          * later we take LSBs to make sure the value is in the
7197          * range [0, queue_len - 1]
7198          */
7199         return (idx + q_len - 1) & mask;
7200 }
7201
7202 /**
7203  * gaudi_print_sw_config_stream_data - print SW config stream data
7204  *
7205  * @hdev: pointer to the habanalabs device structure
7206  * @stream: the QMAN's stream
7207  * @qman_base: base address of QMAN registers block
7208  */
7209 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7210                                                 u64 qman_base)
7211 {
7212         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7213         u32 cq_ptr_lo_off, size;
7214
7215         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7216
7217         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7218                                                 stream * cq_ptr_lo_off;
7219         cq_ptr_hi = cq_ptr_lo +
7220                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7221         cq_tsize = cq_ptr_lo +
7222                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7223
7224         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7225         size = RREG32(cq_tsize);
7226         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7227                                                         stream, cq_ptr, size);
7228 }
7229
7230 /**
7231  * gaudi_print_last_pqes_on_err - print last PQEs on error
7232  *
7233  * @hdev: pointer to the habanalabs device structure
7234  * @qid_base: first QID of the QMAN (out of 4 streams)
7235  * @stream: the QMAN's stream
7236  * @qman_base: base address of QMAN registers block
7237  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7238  */
7239 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7240                                                 u32 stream, u64 qman_base,
7241                                                 bool pr_sw_conf)
7242 {
7243         u32 ci, qm_ci_stream_off, queue_len;
7244         struct hl_hw_queue *q;
7245         u64 pq_ci;
7246         int i;
7247
7248         q = &hdev->kernel_queues[qid_base + stream];
7249
7250         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7251         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7252                                                 stream * qm_ci_stream_off;
7253
7254         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7255                                         q->int_queue_len : HL_QUEUE_LENGTH;
7256
7257         hdev->asic_funcs->hw_queues_lock(hdev);
7258
7259         if (pr_sw_conf)
7260                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7261
7262         ci = RREG32(pq_ci);
7263
7264         /* we should start printing form ci -1 */
7265         ci = gaudi_queue_idx_dec(ci, queue_len);
7266
7267         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7268                 struct hl_bd *bd;
7269                 u64 addr;
7270                 u32 len;
7271
7272                 bd = q->kernel_address;
7273                 bd += ci;
7274
7275                 len = le32_to_cpu(bd->len);
7276                 /* len 0 means uninitialized entry- break */
7277                 if (!len)
7278                         break;
7279
7280                 addr = le64_to_cpu(bd->ptr);
7281
7282                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7283                                                         stream, ci, addr, len);
7284
7285                 /* get previous ci, wrap if needed */
7286                 ci = gaudi_queue_idx_dec(ci, queue_len);
7287         }
7288
7289         hdev->asic_funcs->hw_queues_unlock(hdev);
7290 }
7291
7292 /**
7293  * print_qman_data_on_err - extract QMAN data on error
7294  *
7295  * @hdev: pointer to the habanalabs device structure
7296  * @qid_base: first QID of the QMAN (out of 4 streams)
7297  * @stream: the QMAN's stream
7298  * @qman_base: base address of QMAN registers block
7299  *
7300  * This function attempt to exatract as much data as possible on QMAN error.
7301  * On upper CP print the SW config stream data and last 8 PQEs.
7302  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7303  */
7304 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7305                                                 u32 stream, u64 qman_base)
7306 {
7307         u32 i;
7308
7309         if (stream != QMAN_STREAMS) {
7310                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7311                                                                         true);
7312                 return;
7313         }
7314
7315         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7316
7317         for (i = 0; i < QMAN_STREAMS; i++)
7318                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7319                                                                         false);
7320 }
7321
7322 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7323                                           const char *qm_name,
7324                                           u64 qman_base,
7325                                           u32 qid_base)
7326 {
7327         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7328         u64 glbl_sts_addr, arb_err_addr;
7329         char reg_desc[32];
7330
7331         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7332         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7333
7334         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7335         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7336                 glbl_sts_clr_val = 0;
7337                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7338
7339                 if (!glbl_sts_val)
7340                         continue;
7341
7342                 if (i == QMAN_STREAMS)
7343                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7344                 else
7345                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7346
7347                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7348                         if (glbl_sts_val & BIT(j)) {
7349                                 dev_err_ratelimited(hdev->dev,
7350                                                 "%s %s. err cause: %s\n",
7351                                                 qm_name, reg_desc,
7352                                                 gaudi_qman_error_cause[j]);
7353                                 glbl_sts_clr_val |= BIT(j);
7354                         }
7355                 }
7356
7357                 /* Write 1 clear errors */
7358                 if (!hdev->stop_on_err)
7359                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7360                 else
7361                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7362         }
7363
7364         arb_err_val = RREG32(arb_err_addr);
7365
7366         if (!arb_err_val)
7367                 return;
7368
7369         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7370                 if (arb_err_val & BIT(j)) {
7371                         dev_err_ratelimited(hdev->dev,
7372                                         "%s ARB_ERR. err cause: %s\n",
7373                                         qm_name,
7374                                         gaudi_qman_arb_error_cause[j]);
7375                 }
7376         }
7377 }
7378
7379 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7380                 struct hl_eq_sm_sei_data *sei_data)
7381 {
7382         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7383
7384         /* Flip the bits as the enum is ordered in the opposite way */
7385         index = (index ^ 0x3) & 0x3;
7386
7387         switch (sei_data->sei_cause) {
7388         case SM_SEI_SO_OVERFLOW:
7389                 dev_err_ratelimited(hdev->dev,
7390                         "%s SEI Error: SOB Group %u overflow/underflow",
7391                         gaudi_sync_manager_names[index],
7392                         le32_to_cpu(sei_data->sei_log));
7393                 break;
7394         case SM_SEI_LBW_4B_UNALIGNED:
7395                 dev_err_ratelimited(hdev->dev,
7396                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7397                         gaudi_sync_manager_names[index],
7398                         le32_to_cpu(sei_data->sei_log));
7399                 break;
7400         case SM_SEI_AXI_RESPONSE_ERR:
7401                 dev_err_ratelimited(hdev->dev,
7402                         "%s SEI Error: AXI ID %u response error",
7403                         gaudi_sync_manager_names[index],
7404                         le32_to_cpu(sei_data->sei_log));
7405                 break;
7406         default:
7407                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7408                                 le32_to_cpu(sei_data->sei_log));
7409                 break;
7410         }
7411 }
7412
7413 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7414                 struct hl_eq_ecc_data *ecc_data)
7415 {
7416         struct ecc_info_extract_params params;
7417         u64 ecc_address = 0, ecc_syndrom = 0;
7418         u8 index, memory_wrapper_idx = 0;
7419         bool extract_info_from_fw;
7420         int rc;
7421
7422         if (hdev->asic_prop.fw_security_enabled) {
7423                 extract_info_from_fw = true;
7424                 goto extract_ecc_info;
7425         }
7426
7427         switch (event_type) {
7428         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7429         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7430                 extract_info_from_fw = true;
7431                 break;
7432         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7433                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7434                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7435                 params.num_memories = 90;
7436                 params.derr = false;
7437                 extract_info_from_fw = false;
7438                 break;
7439         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7440                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7441                 params.block_address =
7442                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7443                 params.num_memories = 90;
7444                 params.derr = true;
7445                 extract_info_from_fw = false;
7446                 break;
7447         case GAUDI_EVENT_MME0_ACC_SERR:
7448         case GAUDI_EVENT_MME1_ACC_SERR:
7449         case GAUDI_EVENT_MME2_ACC_SERR:
7450         case GAUDI_EVENT_MME3_ACC_SERR:
7451                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7452                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7453                 params.num_memories = 128;
7454                 params.derr = false;
7455                 extract_info_from_fw = false;
7456                 break;
7457         case GAUDI_EVENT_MME0_ACC_DERR:
7458         case GAUDI_EVENT_MME1_ACC_DERR:
7459         case GAUDI_EVENT_MME2_ACC_DERR:
7460         case GAUDI_EVENT_MME3_ACC_DERR:
7461                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7462                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7463                 params.num_memories = 128;
7464                 params.derr = true;
7465                 extract_info_from_fw = false;
7466                 break;
7467         case GAUDI_EVENT_MME0_SBAB_SERR:
7468         case GAUDI_EVENT_MME1_SBAB_SERR:
7469         case GAUDI_EVENT_MME2_SBAB_SERR:
7470         case GAUDI_EVENT_MME3_SBAB_SERR:
7471                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7472                 params.block_address =
7473                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7474                 params.num_memories = 33;
7475                 params.derr = false;
7476                 extract_info_from_fw = false;
7477                 break;
7478         case GAUDI_EVENT_MME0_SBAB_DERR:
7479         case GAUDI_EVENT_MME1_SBAB_DERR:
7480         case GAUDI_EVENT_MME2_SBAB_DERR:
7481         case GAUDI_EVENT_MME3_SBAB_DERR:
7482                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7483                 params.block_address =
7484                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7485                 params.num_memories = 33;
7486                 params.derr = true;
7487                 extract_info_from_fw = false;
7488                 break;
7489         default:
7490                 return;
7491         }
7492
7493 extract_ecc_info:
7494         if (extract_info_from_fw) {
7495                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7496                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7497                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7498         } else {
7499                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7500                                 &ecc_syndrom, &memory_wrapper_idx);
7501                 if (rc)
7502                         return;
7503         }
7504
7505         dev_err(hdev->dev,
7506                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7507                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7508 }
7509
7510 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7511 {
7512         u64 qman_base;
7513         char desc[32];
7514         u32 qid_base;
7515         u8 index;
7516
7517         switch (event_type) {
7518         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7519                 index = event_type - GAUDI_EVENT_TPC0_QM;
7520                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7521                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7522                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7523                 break;
7524         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7525                 index = event_type - GAUDI_EVENT_MME0_QM;
7526                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7527                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7528                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7529                 break;
7530         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7531                 index = event_type - GAUDI_EVENT_DMA0_QM;
7532                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7533                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7534                 if (index > 1)
7535                         qid_base++;
7536                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7537                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7538                 break;
7539         case GAUDI_EVENT_NIC0_QM0:
7540                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7541                 qman_base = mmNIC0_QM0_BASE;
7542                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7543                 break;
7544         case GAUDI_EVENT_NIC0_QM1:
7545                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7546                 qman_base = mmNIC0_QM1_BASE;
7547                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7548                 break;
7549         case GAUDI_EVENT_NIC1_QM0:
7550                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7551                 qman_base = mmNIC1_QM0_BASE;
7552                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7553                 break;
7554         case GAUDI_EVENT_NIC1_QM1:
7555                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7556                 qman_base = mmNIC1_QM1_BASE;
7557                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7558                 break;
7559         case GAUDI_EVENT_NIC2_QM0:
7560                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7561                 qman_base = mmNIC2_QM0_BASE;
7562                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7563                 break;
7564         case GAUDI_EVENT_NIC2_QM1:
7565                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7566                 qman_base = mmNIC2_QM1_BASE;
7567                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7568                 break;
7569         case GAUDI_EVENT_NIC3_QM0:
7570                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7571                 qman_base = mmNIC3_QM0_BASE;
7572                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7573                 break;
7574         case GAUDI_EVENT_NIC3_QM1:
7575                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7576                 qman_base = mmNIC3_QM1_BASE;
7577                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7578                 break;
7579         case GAUDI_EVENT_NIC4_QM0:
7580                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7581                 qman_base = mmNIC4_QM0_BASE;
7582                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7583                 break;
7584         case GAUDI_EVENT_NIC4_QM1:
7585                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7586                 qman_base = mmNIC4_QM1_BASE;
7587                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7588                 break;
7589         default:
7590                 return;
7591         }
7592
7593         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7594 }
7595
7596 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7597                                         bool razwi)
7598 {
7599         u32 engine_id_1, engine_id_2;
7600         char desc[64] = "";
7601         u64 razwi_addr = 0;
7602         u8 razwi_type;
7603         int rc;
7604
7605         /*
7606          * Init engine id by default as not valid and only if razwi initiated from engine with
7607          * engine id it will get valid value.
7608          * Init razwi type to default, will be changed only if razwi caused by page fault of
7609          * MMU access error
7610          */
7611         engine_id_1 = U16_MAX;
7612         engine_id_2 = U16_MAX;
7613         razwi_type = U8_MAX;
7614
7615         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7616         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7617                 event_type, desc);
7618
7619         if (razwi) {
7620                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7621                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7622
7623                 /* In case it's the first razwi, save its parameters*/
7624                 rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
7625                 if (!rc) {
7626                         hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
7627                         hdev->last_error.razwi_timestamp = ktime_get();
7628                         hdev->last_error.razwi_addr = razwi_addr;
7629                         hdev->last_error.razwi_engine_id_1 = engine_id_1;
7630                         hdev->last_error.razwi_engine_id_2 = engine_id_2;
7631                         /*
7632                          * If first engine id holds non valid value the razwi initiator
7633                          * does not have engine id
7634                          */
7635                         hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
7636                         hdev->last_error.razwi_type = razwi_type;
7637
7638                 }
7639         }
7640 }
7641
7642 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7643                                         struct cpucp_pkt_sync_err *sync_err)
7644 {
7645         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7646
7647         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7648                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7649 }
7650
7651 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7652                                         struct hl_eq_fw_alive *fw_alive)
7653 {
7654         dev_err(hdev->dev,
7655                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7656                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7657                 "Minor" : "Critical", fw_alive->process_id,
7658                 fw_alive->thread_id, fw_alive->uptime_seconds);
7659 }
7660
7661 static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
7662 {
7663         /* GAUDI doesn't support any reset except hard-reset */
7664         return -EPERM;
7665 }
7666
7667 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7668                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7669 {
7670         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7671         int rc = 0;
7672
7673         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7674                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7675                 if (!hbm_ecc_data) {
7676                         dev_err(hdev->dev, "No FW ECC data");
7677                         return 0;
7678                 }
7679
7680                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7681                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7682                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7683                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7684                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7685                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7686                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7687                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7688                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7689                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7690                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7691                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7692                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7693                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7694
7695                 dev_err(hdev->dev,
7696                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7697                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7698                 dev_err(hdev->dev,
7699                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7700                         device, ch, hbm_ecc_data->first_addr, type,
7701                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7702                         hbm_ecc_data->dec_cnt);
7703                 return 0;
7704         }
7705
7706         if (hdev->asic_prop.fw_security_enabled) {
7707                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7708                 return 0;
7709         }
7710
7711         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7712         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7713                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7714                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7715                 if (val) {
7716                         rc = -EIO;
7717                         dev_err(hdev->dev,
7718                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7719                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7720                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7721                                 (val >> 4) & 0x1);
7722
7723                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7724                         dev_err(hdev->dev,
7725                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7726                                 device, ch * 2,
7727                                 RREG32(base + ch * 0x1000 + 0x064),
7728                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7729                                 (val2 & 0xFF0000) >> 16,
7730                                 (val2 & 0xFF000000) >> 24);
7731                 }
7732
7733                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7734                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7735                 if (val) {
7736                         rc = -EIO;
7737                         dev_err(hdev->dev,
7738                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7739                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7740                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7741                                 (val >> 4) & 0x1);
7742
7743                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7744                         dev_err(hdev->dev,
7745                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7746                                 device, ch * 2 + 1,
7747                                 RREG32(base + ch * 0x1000 + 0x074),
7748                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7749                                 (val2 & 0xFF0000) >> 16,
7750                                 (val2 & 0xFF000000) >> 24);
7751                 }
7752
7753                 /* Clear interrupts */
7754                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7755                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7756                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7757                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7758                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7759                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7760         }
7761
7762         val  = RREG32(base + 0x8F30);
7763         val2 = RREG32(base + 0x8F34);
7764         if (val | val2) {
7765                 rc = -EIO;
7766                 dev_err(hdev->dev,
7767                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7768                         device, val, val2);
7769         }
7770         val  = RREG32(base + 0x8F40);
7771         val2 = RREG32(base + 0x8F44);
7772         if (val | val2) {
7773                 rc = -EIO;
7774                 dev_err(hdev->dev,
7775                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7776                         device, val, val2);
7777         }
7778
7779         return rc;
7780 }
7781
7782 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7783 {
7784         switch (hbm_event_type) {
7785         case GAUDI_EVENT_HBM0_SPI_0:
7786         case GAUDI_EVENT_HBM0_SPI_1:
7787                 return 0;
7788         case GAUDI_EVENT_HBM1_SPI_0:
7789         case GAUDI_EVENT_HBM1_SPI_1:
7790                 return 1;
7791         case GAUDI_EVENT_HBM2_SPI_0:
7792         case GAUDI_EVENT_HBM2_SPI_1:
7793                 return 2;
7794         case GAUDI_EVENT_HBM3_SPI_0:
7795         case GAUDI_EVENT_HBM3_SPI_1:
7796                 return 3;
7797         default:
7798                 break;
7799         }
7800
7801         /* Should never happen */
7802         return 0;
7803 }
7804
7805 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7806                                         char *interrupt_name)
7807 {
7808         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7809         bool soft_reset_required = false;
7810
7811         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7812                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7813
7814         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7815                 if (tpc_interrupts_cause & BIT(i)) {
7816                         dev_err_ratelimited(hdev->dev,
7817                                         "TPC%d_%s interrupt cause: %s\n",
7818                                         tpc_id, interrupt_name,
7819                                         gaudi_tpc_interrupts_cause[i]);
7820                         /* If this is QM error, we need to soft-reset */
7821                         if (i == 15)
7822                                 soft_reset_required = true;
7823                 }
7824
7825         /* Clear interrupts */
7826         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7827
7828         return soft_reset_required;
7829 }
7830
7831 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7832 {
7833         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7834 }
7835
7836 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7837 {
7838         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7839 }
7840
7841 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7842                                         u16 event_type)
7843 {
7844         ktime_t zero_time = ktime_set(0, 0);
7845
7846         mutex_lock(&hdev->clk_throttling.lock);
7847
7848         switch (event_type) {
7849         case GAUDI_EVENT_FIX_POWER_ENV_S:
7850                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7851                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7852                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7853                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7854                 dev_info_ratelimited(hdev->dev,
7855                         "Clock throttling due to power consumption\n");
7856                 break;
7857
7858         case GAUDI_EVENT_FIX_POWER_ENV_E:
7859                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7860                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7861                 dev_info_ratelimited(hdev->dev,
7862                         "Power envelop is safe, back to optimal clock\n");
7863                 break;
7864
7865         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7866                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7867                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7868                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7869                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7870                 dev_info_ratelimited(hdev->dev,
7871                         "Clock throttling due to overheating\n");
7872                 break;
7873
7874         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7875                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7876                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7877                 dev_info_ratelimited(hdev->dev,
7878                         "Thermal envelop is safe, back to optimal clock\n");
7879                 break;
7880
7881         default:
7882                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7883                         event_type);
7884                 break;
7885         }
7886
7887         mutex_unlock(&hdev->clk_throttling.lock);
7888 }
7889
7890 static void gaudi_handle_eqe(struct hl_device *hdev,
7891                                 struct hl_eq_entry *eq_entry)
7892 {
7893         struct gaudi_device *gaudi = hdev->asic_specific;
7894         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7895         u32 fw_fatal_err_flag = 0;
7896         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7897                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7898         bool reset_required;
7899         u8 cause;
7900         int rc;
7901
7902         if (event_type >= GAUDI_EVENT_SIZE) {
7903                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7904                                 event_type, GAUDI_EVENT_SIZE - 1);
7905                 return;
7906         }
7907
7908         gaudi->events_stat[event_type]++;
7909         gaudi->events_stat_aggregate[event_type]++;
7910
7911         switch (event_type) {
7912         case GAUDI_EVENT_PCIE_CORE_DERR:
7913         case GAUDI_EVENT_PCIE_IF_DERR:
7914         case GAUDI_EVENT_PCIE_PHY_DERR:
7915         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7916         case GAUDI_EVENT_MME0_ACC_DERR:
7917         case GAUDI_EVENT_MME0_SBAB_DERR:
7918         case GAUDI_EVENT_MME1_ACC_DERR:
7919         case GAUDI_EVENT_MME1_SBAB_DERR:
7920         case GAUDI_EVENT_MME2_ACC_DERR:
7921         case GAUDI_EVENT_MME2_SBAB_DERR:
7922         case GAUDI_EVENT_MME3_ACC_DERR:
7923         case GAUDI_EVENT_MME3_SBAB_DERR:
7924         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7925                 fallthrough;
7926         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7927         case GAUDI_EVENT_PSOC_MEM_DERR:
7928         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7929         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7930         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7931         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7932         case GAUDI_EVENT_MMU_DERR:
7933         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7934                 gaudi_print_irq_info(hdev, event_type, true);
7935                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7936                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7937                 goto reset_device;
7938
7939         case GAUDI_EVENT_GIC500:
7940         case GAUDI_EVENT_AXI_ECC:
7941         case GAUDI_EVENT_L2_RAM_ECC:
7942         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7943                 gaudi_print_irq_info(hdev, event_type, false);
7944                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7945                 goto reset_device;
7946
7947         case GAUDI_EVENT_HBM0_SPI_0:
7948         case GAUDI_EVENT_HBM1_SPI_0:
7949         case GAUDI_EVENT_HBM2_SPI_0:
7950         case GAUDI_EVENT_HBM3_SPI_0:
7951                 gaudi_print_irq_info(hdev, event_type, false);
7952                 gaudi_hbm_read_interrupts(hdev,
7953                                 gaudi_hbm_event_to_dev(event_type),
7954                                 &eq_entry->hbm_ecc_data);
7955                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7956                 goto reset_device;
7957
7958         case GAUDI_EVENT_HBM0_SPI_1:
7959         case GAUDI_EVENT_HBM1_SPI_1:
7960         case GAUDI_EVENT_HBM2_SPI_1:
7961         case GAUDI_EVENT_HBM3_SPI_1:
7962                 gaudi_print_irq_info(hdev, event_type, false);
7963                 gaudi_hbm_read_interrupts(hdev,
7964                                 gaudi_hbm_event_to_dev(event_type),
7965                                 &eq_entry->hbm_ecc_data);
7966                 hl_fw_unmask_irq(hdev, event_type);
7967                 break;
7968
7969         case GAUDI_EVENT_TPC0_DEC:
7970         case GAUDI_EVENT_TPC1_DEC:
7971         case GAUDI_EVENT_TPC2_DEC:
7972         case GAUDI_EVENT_TPC3_DEC:
7973         case GAUDI_EVENT_TPC4_DEC:
7974         case GAUDI_EVENT_TPC5_DEC:
7975         case GAUDI_EVENT_TPC6_DEC:
7976         case GAUDI_EVENT_TPC7_DEC:
7977                 gaudi_print_irq_info(hdev, event_type, true);
7978                 reset_required = gaudi_tpc_read_interrupts(hdev,
7979                                         tpc_dec_event_to_tpc_id(event_type),
7980                                         "AXI_SLV_DEC_Error");
7981                 if (reset_required) {
7982                         dev_err(hdev->dev, "reset required due to %s\n",
7983                                 gaudi_irq_map_table[event_type].name);
7984
7985                         hl_device_reset(hdev, 0);
7986                 } else {
7987                         hl_fw_unmask_irq(hdev, event_type);
7988                 }
7989                 break;
7990
7991         case GAUDI_EVENT_TPC0_KRN_ERR:
7992         case GAUDI_EVENT_TPC1_KRN_ERR:
7993         case GAUDI_EVENT_TPC2_KRN_ERR:
7994         case GAUDI_EVENT_TPC3_KRN_ERR:
7995         case GAUDI_EVENT_TPC4_KRN_ERR:
7996         case GAUDI_EVENT_TPC5_KRN_ERR:
7997         case GAUDI_EVENT_TPC6_KRN_ERR:
7998         case GAUDI_EVENT_TPC7_KRN_ERR:
7999                 gaudi_print_irq_info(hdev, event_type, true);
8000                 reset_required = gaudi_tpc_read_interrupts(hdev,
8001                                         tpc_krn_event_to_tpc_id(event_type),
8002                                         "KRN_ERR");
8003                 if (reset_required) {
8004                         dev_err(hdev->dev, "reset required due to %s\n",
8005                                 gaudi_irq_map_table[event_type].name);
8006
8007                         hl_device_reset(hdev, 0);
8008                 } else {
8009                         hl_fw_unmask_irq(hdev, event_type);
8010                 }
8011                 break;
8012
8013         case GAUDI_EVENT_PCIE_CORE_SERR:
8014         case GAUDI_EVENT_PCIE_IF_SERR:
8015         case GAUDI_EVENT_PCIE_PHY_SERR:
8016         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8017         case GAUDI_EVENT_MME0_ACC_SERR:
8018         case GAUDI_EVENT_MME0_SBAB_SERR:
8019         case GAUDI_EVENT_MME1_ACC_SERR:
8020         case GAUDI_EVENT_MME1_SBAB_SERR:
8021         case GAUDI_EVENT_MME2_ACC_SERR:
8022         case GAUDI_EVENT_MME2_SBAB_SERR:
8023         case GAUDI_EVENT_MME3_ACC_SERR:
8024         case GAUDI_EVENT_MME3_SBAB_SERR:
8025         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8026         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8027         case GAUDI_EVENT_PSOC_MEM_SERR:
8028         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8029         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8030         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8031         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8032                 fallthrough;
8033         case GAUDI_EVENT_MMU_SERR:
8034                 gaudi_print_irq_info(hdev, event_type, true);
8035                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8036                 hl_fw_unmask_irq(hdev, event_type);
8037                 break;
8038
8039         case GAUDI_EVENT_PCIE_DEC:
8040         case GAUDI_EVENT_MME0_WBC_RSP:
8041         case GAUDI_EVENT_MME0_SBAB0_RSP:
8042         case GAUDI_EVENT_MME1_WBC_RSP:
8043         case GAUDI_EVENT_MME1_SBAB0_RSP:
8044         case GAUDI_EVENT_MME2_WBC_RSP:
8045         case GAUDI_EVENT_MME2_SBAB0_RSP:
8046         case GAUDI_EVENT_MME3_WBC_RSP:
8047         case GAUDI_EVENT_MME3_SBAB0_RSP:
8048         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8049         case GAUDI_EVENT_PSOC_AXI_DEC:
8050         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8051         case GAUDI_EVENT_MMU_PAGE_FAULT:
8052         case GAUDI_EVENT_MMU_WR_PERM:
8053         case GAUDI_EVENT_RAZWI_OR_ADC:
8054         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8055         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8056         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8057                 fallthrough;
8058         case GAUDI_EVENT_NIC0_QM0:
8059         case GAUDI_EVENT_NIC0_QM1:
8060         case GAUDI_EVENT_NIC1_QM0:
8061         case GAUDI_EVENT_NIC1_QM1:
8062         case GAUDI_EVENT_NIC2_QM0:
8063         case GAUDI_EVENT_NIC2_QM1:
8064         case GAUDI_EVENT_NIC3_QM0:
8065         case GAUDI_EVENT_NIC3_QM1:
8066         case GAUDI_EVENT_NIC4_QM0:
8067         case GAUDI_EVENT_NIC4_QM1:
8068         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8069                 gaudi_print_irq_info(hdev, event_type, true);
8070                 gaudi_handle_qman_err(hdev, event_type);
8071                 hl_fw_unmask_irq(hdev, event_type);
8072                 break;
8073
8074         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8075                 gaudi_print_irq_info(hdev, event_type, true);
8076                 goto reset_device;
8077
8078         case GAUDI_EVENT_TPC0_BMON_SPMU:
8079         case GAUDI_EVENT_TPC1_BMON_SPMU:
8080         case GAUDI_EVENT_TPC2_BMON_SPMU:
8081         case GAUDI_EVENT_TPC3_BMON_SPMU:
8082         case GAUDI_EVENT_TPC4_BMON_SPMU:
8083         case GAUDI_EVENT_TPC5_BMON_SPMU:
8084         case GAUDI_EVENT_TPC6_BMON_SPMU:
8085         case GAUDI_EVENT_TPC7_BMON_SPMU:
8086         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8087                 gaudi_print_irq_info(hdev, event_type, false);
8088                 hl_fw_unmask_irq(hdev, event_type);
8089                 break;
8090
8091         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8092                 gaudi_print_irq_info(hdev, event_type, false);
8093                 gaudi_print_sm_sei_info(hdev, event_type,
8094                                         &eq_entry->sm_sei_data);
8095                 rc = hl_state_dump(hdev);
8096                 if (rc)
8097                         dev_err(hdev->dev,
8098                                 "Error during system state dump %d\n", rc);
8099                 hl_fw_unmask_irq(hdev, event_type);
8100                 break;
8101
8102         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8103                 gaudi_print_clk_change_info(hdev, event_type);
8104                 hl_fw_unmask_irq(hdev, event_type);
8105                 break;
8106
8107         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8108                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8109                 dev_err(hdev->dev,
8110                         "Received high temp H/W interrupt %d (cause %d)\n",
8111                         event_type, cause);
8112                 break;
8113
8114         case GAUDI_EVENT_DEV_RESET_REQ:
8115                 gaudi_print_irq_info(hdev, event_type, false);
8116                 goto reset_device;
8117
8118         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8119                 gaudi_print_irq_info(hdev, event_type, false);
8120                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8121                 goto reset_device;
8122
8123         case GAUDI_EVENT_FW_ALIVE_S:
8124                 gaudi_print_irq_info(hdev, event_type, false);
8125                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8126                 goto reset_device;
8127
8128         default:
8129                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8130                                 event_type);
8131                 break;
8132         }
8133
8134         return;
8135
8136 reset_device:
8137         if (hdev->asic_prop.fw_security_enabled)
8138                 hl_device_reset(hdev, HL_DRV_RESET_HARD
8139                                         | HL_DRV_RESET_BYPASS_REQ_TO_FW
8140                                         | fw_fatal_err_flag);
8141         else if (hdev->hard_reset_on_fw_events)
8142                 hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
8143         else
8144                 hl_fw_unmask_irq(hdev, event_type);
8145 }
8146
8147 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8148                                         u32 *size)
8149 {
8150         struct gaudi_device *gaudi = hdev->asic_specific;
8151
8152         if (aggregate) {
8153                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8154                 return gaudi->events_stat_aggregate;
8155         }
8156
8157         *size = (u32) sizeof(gaudi->events_stat);
8158         return gaudi->events_stat;
8159 }
8160
8161 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8162                                         u32 flags)
8163 {
8164         struct gaudi_device *gaudi = hdev->asic_specific;
8165         u32 status, timeout_usec;
8166         int rc;
8167
8168         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8169                 hdev->reset_info.hard_reset_pending)
8170                 return 0;
8171
8172         if (hdev->pldm)
8173                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8174         else
8175                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8176
8177         /* L0 & L1 invalidation */
8178         WREG32(mmSTLB_INV_PS, 3);
8179         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8180         WREG32(mmSTLB_INV_PS, 2);
8181
8182         rc = hl_poll_timeout(
8183                 hdev,
8184                 mmSTLB_INV_PS,
8185                 status,
8186                 !status,
8187                 1000,
8188                 timeout_usec);
8189
8190         WREG32(mmSTLB_INV_SET, 0);
8191
8192         return rc;
8193 }
8194
8195 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8196                                                 bool is_hard, u32 flags,
8197                                                 u32 asid, u64 va, u64 size)
8198 {
8199         /* Treat as invalidate all because there is no range invalidation
8200          * in Gaudi
8201          */
8202         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8203 }
8204
8205 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8206                                         u32 asid, u64 phys_addr)
8207 {
8208         u32 status, timeout_usec;
8209         int rc;
8210
8211         if (hdev->pldm)
8212                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8213         else
8214                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8215
8216         WREG32(MMU_ASID, asid);
8217         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8218         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8219         WREG32(MMU_BUSY, 0x80000000);
8220
8221         rc = hl_poll_timeout(
8222                 hdev,
8223                 MMU_BUSY,
8224                 status,
8225                 !(status & 0x80000000),
8226                 1000,
8227                 timeout_usec);
8228
8229         if (rc) {
8230                 dev_err(hdev->dev,
8231                         "Timeout during MMU hop0 config of asid %d\n", asid);
8232                 return rc;
8233         }
8234
8235         return 0;
8236 }
8237
8238 static int gaudi_send_heartbeat(struct hl_device *hdev)
8239 {
8240         struct gaudi_device *gaudi = hdev->asic_specific;
8241
8242         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8243                 return 0;
8244
8245         return hl_fw_send_heartbeat(hdev);
8246 }
8247
8248 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8249 {
8250         struct gaudi_device *gaudi = hdev->asic_specific;
8251         struct asic_fixed_properties *prop = &hdev->asic_prop;
8252         int rc;
8253
8254         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8255                 return 0;
8256
8257         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8258                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8259                                         mmCPU_BOOT_ERR1);
8260         if (rc)
8261                 return rc;
8262
8263         if (!strlen(prop->cpucp_info.card_name))
8264                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8265                                 CARD_NAME_MAX_LEN);
8266
8267         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8268
8269         set_default_power_values(hdev);
8270
8271         hdev->max_power = prop->max_power_default;
8272
8273         return 0;
8274 }
8275
8276 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8277                                         u8 mask_len, struct seq_file *s)
8278 {
8279         struct gaudi_device *gaudi = hdev->asic_specific;
8280         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8281         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8282         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8283         unsigned long *mask = (unsigned long *)mask_arr;
8284         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8285         bool is_idle = true, is_eng_idle, is_slave;
8286         u64 offset;
8287         int i, dma_id, port;
8288
8289         if (s)
8290                 seq_puts(s,
8291                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8292                         "---  -------  ------------  ----------  -------------\n");
8293
8294         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8295                 dma_id = gaudi_dma_assignment[i];
8296                 offset = dma_id * DMA_QMAN_OFFSET;
8297
8298                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8299                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8300                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8301                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8302                                 IS_DMA_IDLE(dma_core_sts0);
8303                 is_idle &= is_eng_idle;
8304
8305                 if (mask && !is_eng_idle)
8306                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8307                 if (s)
8308                         seq_printf(s, fmt, dma_id,
8309                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8310                                 qm_cgm_sts, dma_core_sts0);
8311         }
8312
8313         if (s)
8314                 seq_puts(s,
8315                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8316                         "---  -------  ------------  ----------  ----------\n");
8317
8318         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8319                 offset = i * TPC_QMAN_OFFSET;
8320                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8321                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8322                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8323                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8324                                 IS_TPC_IDLE(tpc_cfg_sts);
8325                 is_idle &= is_eng_idle;
8326
8327                 if (mask && !is_eng_idle)
8328                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8329                 if (s)
8330                         seq_printf(s, fmt, i,
8331                                 is_eng_idle ? "Y" : "N",
8332                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8333         }
8334
8335         if (s)
8336                 seq_puts(s,
8337                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8338                         "---  -------  ------------  ----------  -----------\n");
8339
8340         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8341                 offset = i * MME_QMAN_OFFSET;
8342                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8343                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8344
8345                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8346                 is_slave = i % 2;
8347                 if (!is_slave) {
8348                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8349                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8350                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8351                 }
8352
8353                 is_idle &= is_eng_idle;
8354
8355                 if (mask && !is_eng_idle)
8356                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8357                 if (s) {
8358                         if (!is_slave)
8359                                 seq_printf(s, fmt, i,
8360                                         is_eng_idle ? "Y" : "N",
8361                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8362                         else
8363                                 seq_printf(s, mme_slave_fmt, i,
8364                                         is_eng_idle ? "Y" : "N", "-",
8365                                         "-", mme_arch_sts);
8366                 }
8367         }
8368
8369         if (s)
8370                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8371                                 "---  -------  ------------  ----------\n");
8372
8373         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8374                 offset = i * NIC_MACRO_QMAN_OFFSET;
8375                 port = 2 * i;
8376                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8377                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8378                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8379                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8380                         is_idle &= is_eng_idle;
8381
8382                         if (mask && !is_eng_idle)
8383                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8384                         if (s)
8385                                 seq_printf(s, nic_fmt, port,
8386                                                 is_eng_idle ? "Y" : "N",
8387                                                 qm_glbl_sts0, qm_cgm_sts);
8388                 }
8389
8390                 port = 2 * i + 1;
8391                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8392                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8393                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8394                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8395                         is_idle &= is_eng_idle;
8396
8397                         if (mask && !is_eng_idle)
8398                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8399                         if (s)
8400                                 seq_printf(s, nic_fmt, port,
8401                                                 is_eng_idle ? "Y" : "N",
8402                                                 qm_glbl_sts0, qm_cgm_sts);
8403                 }
8404         }
8405
8406         if (s)
8407                 seq_puts(s, "\n");
8408
8409         return is_idle;
8410 }
8411
8412 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8413         __acquires(&gaudi->hw_queues_lock)
8414 {
8415         struct gaudi_device *gaudi = hdev->asic_specific;
8416
8417         spin_lock(&gaudi->hw_queues_lock);
8418 }
8419
8420 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8421         __releases(&gaudi->hw_queues_lock)
8422 {
8423         struct gaudi_device *gaudi = hdev->asic_specific;
8424
8425         spin_unlock(&gaudi->hw_queues_lock);
8426 }
8427
8428 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8429 {
8430         return hdev->pdev->device;
8431 }
8432
8433 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8434                                 size_t max_size)
8435 {
8436         struct gaudi_device *gaudi = hdev->asic_specific;
8437
8438         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8439                 return 0;
8440
8441         return hl_fw_get_eeprom_data(hdev, data, max_size);
8442 }
8443
8444 /*
8445  * this function should be used only during initialization and/or after reset,
8446  * when there are no active users.
8447  */
8448 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8449 {
8450         u64 kernel_timeout;
8451         u32 status, offset;
8452         int rc;
8453
8454         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8455
8456         if (hdev->pldm)
8457                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8458         else
8459                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8460
8461         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8462                         lower_32_bits(tpc_kernel));
8463         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8464                         upper_32_bits(tpc_kernel));
8465
8466         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8467                         lower_32_bits(tpc_kernel));
8468         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8469                         upper_32_bits(tpc_kernel));
8470         /* set a valid LUT pointer, content is of no significance */
8471         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8472                         lower_32_bits(tpc_kernel));
8473         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8474                         upper_32_bits(tpc_kernel));
8475
8476         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8477                         lower_32_bits(CFG_BASE +
8478                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8479
8480         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8481                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8482                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8483         /* wait a bit for the engine to start executing */
8484         usleep_range(1000, 1500);
8485
8486         /* wait until engine has finished executing */
8487         rc = hl_poll_timeout(
8488                 hdev,
8489                 mmTPC0_CFG_STATUS + offset,
8490                 status,
8491                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8492                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8493                 1000,
8494                 kernel_timeout);
8495
8496         if (rc) {
8497                 dev_err(hdev->dev,
8498                         "Timeout while waiting for TPC%d icache prefetch\n",
8499                         tpc_id);
8500                 return -EIO;
8501         }
8502
8503         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8504                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8505
8506         /* wait a bit for the engine to start executing */
8507         usleep_range(1000, 1500);
8508
8509         /* wait until engine has finished executing */
8510         rc = hl_poll_timeout(
8511                 hdev,
8512                 mmTPC0_CFG_STATUS + offset,
8513                 status,
8514                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8515                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8516                 1000,
8517                 kernel_timeout);
8518
8519         if (rc) {
8520                 dev_err(hdev->dev,
8521                         "Timeout while waiting for TPC%d vector pipe\n",
8522                         tpc_id);
8523                 return -EIO;
8524         }
8525
8526         rc = hl_poll_timeout(
8527                 hdev,
8528                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8529                 status,
8530                 (status == 0),
8531                 1000,
8532                 kernel_timeout);
8533
8534         if (rc) {
8535                 dev_err(hdev->dev,
8536                         "Timeout while waiting for TPC%d kernel to execute\n",
8537                         tpc_id);
8538                 return -EIO;
8539         }
8540
8541         return 0;
8542 }
8543
8544 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8545                 struct hl_ctx *ctx)
8546 {
8547         struct gaudi_device *gaudi = hdev->asic_specific;
8548         int min_alloc_order, rc, collective_cb_size;
8549
8550         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8551                 return 0;
8552
8553         hdev->internal_cb_pool_virt_addr =
8554                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8555                                         HOST_SPACE_INTERNAL_CB_SZ,
8556                                         &hdev->internal_cb_pool_dma_addr,
8557                                         GFP_KERNEL | __GFP_ZERO);
8558
8559         if (!hdev->internal_cb_pool_virt_addr)
8560                 return -ENOMEM;
8561
8562         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8563                         sizeof(struct packet_fence);
8564         min_alloc_order = ilog2(collective_cb_size);
8565
8566         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8567         if (!hdev->internal_cb_pool) {
8568                 dev_err(hdev->dev,
8569                         "Failed to create internal CB pool\n");
8570                 rc = -ENOMEM;
8571                 goto free_internal_cb_pool;
8572         }
8573
8574         rc = gen_pool_add(hdev->internal_cb_pool,
8575                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8576                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8577         if (rc) {
8578                 dev_err(hdev->dev,
8579                         "Failed to add memory to internal CB pool\n");
8580                 rc = -EFAULT;
8581                 goto destroy_internal_cb_pool;
8582         }
8583
8584         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8585                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8586                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8587
8588         if (!hdev->internal_cb_va_base) {
8589                 rc = -ENOMEM;
8590                 goto destroy_internal_cb_pool;
8591         }
8592
8593         mutex_lock(&ctx->mmu_lock);
8594         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8595                         hdev->internal_cb_pool_dma_addr,
8596                         HOST_SPACE_INTERNAL_CB_SZ);
8597
8598         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8599         mutex_unlock(&ctx->mmu_lock);
8600
8601         if (rc)
8602                 goto unreserve_internal_cb_pool;
8603
8604         return 0;
8605
8606 unreserve_internal_cb_pool:
8607         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8608                         HOST_SPACE_INTERNAL_CB_SZ);
8609 destroy_internal_cb_pool:
8610         gen_pool_destroy(hdev->internal_cb_pool);
8611 free_internal_cb_pool:
8612         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8613                         HOST_SPACE_INTERNAL_CB_SZ,
8614                         hdev->internal_cb_pool_virt_addr,
8615                         hdev->internal_cb_pool_dma_addr);
8616
8617         return rc;
8618 }
8619
8620 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8621                 struct hl_ctx *ctx)
8622 {
8623         struct gaudi_device *gaudi = hdev->asic_specific;
8624
8625         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8626                 return;
8627
8628         mutex_lock(&ctx->mmu_lock);
8629         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8630                         HOST_SPACE_INTERNAL_CB_SZ);
8631         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8632                         HOST_SPACE_INTERNAL_CB_SZ);
8633         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8634         mutex_unlock(&ctx->mmu_lock);
8635
8636         gen_pool_destroy(hdev->internal_cb_pool);
8637
8638         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8639                         HOST_SPACE_INTERNAL_CB_SZ,
8640                         hdev->internal_cb_pool_virt_addr,
8641                         hdev->internal_cb_pool_dma_addr);
8642 }
8643
8644 static int gaudi_ctx_init(struct hl_ctx *ctx)
8645 {
8646         int rc;
8647
8648         if (ctx->asid == HL_KERNEL_ASID_ID)
8649                 return 0;
8650
8651         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8652         if (rc)
8653                 return rc;
8654
8655         rc = gaudi_restore_user_registers(ctx->hdev);
8656         if (rc)
8657                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8658
8659         return rc;
8660 }
8661
8662 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8663 {
8664         if (ctx->asid == HL_KERNEL_ASID_ID)
8665                 return;
8666
8667         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8668 }
8669
8670 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8671 {
8672         return gaudi_cq_assignment[cq_idx];
8673 }
8674
8675 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8676 {
8677         return sizeof(struct packet_msg_short) +
8678                         sizeof(struct packet_msg_prot) * 2;
8679 }
8680
8681 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8682 {
8683         return sizeof(struct packet_msg_short) * 4 +
8684                         sizeof(struct packet_fence) +
8685                         sizeof(struct packet_msg_prot) * 2;
8686 }
8687
8688 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8689 {
8690         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8691 }
8692
8693 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8694                                 u32 size, bool eb)
8695 {
8696         struct hl_cb *cb = (struct hl_cb *) data;
8697         struct packet_msg_short *pkt;
8698         u32 value, ctl, pkt_size = sizeof(*pkt);
8699
8700         pkt = cb->kernel_address + size;
8701         memset(pkt, 0, pkt_size);
8702
8703         /* Inc by 1, Mode ADD */
8704         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8705         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8706
8707         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8708         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8709         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8710         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8711         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8712         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8713         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8714
8715         pkt->value = cpu_to_le32(value);
8716         pkt->ctl = cpu_to_le32(ctl);
8717
8718         return size + pkt_size;
8719 }
8720
8721 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8722                                         u16 addr)
8723 {
8724         u32 ctl, pkt_size = sizeof(*pkt);
8725
8726         memset(pkt, 0, pkt_size);
8727
8728         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8729         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8730         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8731         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8732         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8733         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8734
8735         pkt->value = cpu_to_le32(value);
8736         pkt->ctl = cpu_to_le32(ctl);
8737
8738         return pkt_size;
8739 }
8740
8741 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8742                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8743                 u16 sob_val, u16 mon_id)
8744 {
8745         u64 monitor_base;
8746         u32 ctl, value, pkt_size = sizeof(*pkt);
8747         u16 msg_addr_offset;
8748         u8 mask;
8749
8750         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8751                 dev_err(hdev->dev,
8752                         "sob_base %u (mask %#x) is not valid\n",
8753                         sob_base, sob_mask);
8754                 return 0;
8755         }
8756
8757         /*
8758          * monitor_base should be the content of the base0 address registers,
8759          * so it will be added to the msg short offsets
8760          */
8761         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8762
8763         msg_addr_offset =
8764                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8765                                 monitor_base;
8766
8767         memset(pkt, 0, pkt_size);
8768
8769         /* Monitor config packet: bind the monitor to a sync object */
8770         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8771         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8772         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8773                         0); /* GREATER OR EQUAL*/
8774         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8775
8776         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8777         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8778         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8779         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8780         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8781         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8782         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8783
8784         pkt->value = cpu_to_le32(value);
8785         pkt->ctl = cpu_to_le32(ctl);
8786
8787         return pkt_size;
8788 }
8789
8790 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8791 {
8792         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8793
8794         memset(pkt, 0, pkt_size);
8795
8796         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8797         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8798         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8799
8800         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8801         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8802         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8803         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8804
8805         pkt->cfg = cpu_to_le32(cfg);
8806         pkt->ctl = cpu_to_le32(ctl);
8807
8808         return pkt_size;
8809 }
8810
8811 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8812 {
8813         u32 offset, nic_index;
8814
8815         switch (queue_id) {
8816         case GAUDI_QUEUE_ID_DMA_0_0:
8817                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8818                 break;
8819         case GAUDI_QUEUE_ID_DMA_0_1:
8820                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8821                 break;
8822         case GAUDI_QUEUE_ID_DMA_0_2:
8823                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8824                 break;
8825         case GAUDI_QUEUE_ID_DMA_0_3:
8826                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8827                 break;
8828         case GAUDI_QUEUE_ID_DMA_1_0:
8829                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8830                 break;
8831         case GAUDI_QUEUE_ID_DMA_1_1:
8832                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8833                 break;
8834         case GAUDI_QUEUE_ID_DMA_1_2:
8835                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8836                 break;
8837         case GAUDI_QUEUE_ID_DMA_1_3:
8838                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8839                 break;
8840         case GAUDI_QUEUE_ID_DMA_5_0:
8841                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8842                 break;
8843         case GAUDI_QUEUE_ID_DMA_5_1:
8844                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8845                 break;
8846         case GAUDI_QUEUE_ID_DMA_5_2:
8847                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8848                 break;
8849         case GAUDI_QUEUE_ID_DMA_5_3:
8850                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8851                 break;
8852         case GAUDI_QUEUE_ID_TPC_7_0:
8853                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8854                 break;
8855         case GAUDI_QUEUE_ID_TPC_7_1:
8856                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8857                 break;
8858         case GAUDI_QUEUE_ID_TPC_7_2:
8859                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8860                 break;
8861         case GAUDI_QUEUE_ID_TPC_7_3:
8862                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8863                 break;
8864         case GAUDI_QUEUE_ID_NIC_0_0:
8865         case GAUDI_QUEUE_ID_NIC_1_0:
8866         case GAUDI_QUEUE_ID_NIC_2_0:
8867         case GAUDI_QUEUE_ID_NIC_3_0:
8868         case GAUDI_QUEUE_ID_NIC_4_0:
8869         case GAUDI_QUEUE_ID_NIC_5_0:
8870         case GAUDI_QUEUE_ID_NIC_6_0:
8871         case GAUDI_QUEUE_ID_NIC_7_0:
8872         case GAUDI_QUEUE_ID_NIC_8_0:
8873         case GAUDI_QUEUE_ID_NIC_9_0:
8874                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8875                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8876                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8877                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8878                 break;
8879         case GAUDI_QUEUE_ID_NIC_0_1:
8880         case GAUDI_QUEUE_ID_NIC_1_1:
8881         case GAUDI_QUEUE_ID_NIC_2_1:
8882         case GAUDI_QUEUE_ID_NIC_3_1:
8883         case GAUDI_QUEUE_ID_NIC_4_1:
8884         case GAUDI_QUEUE_ID_NIC_5_1:
8885         case GAUDI_QUEUE_ID_NIC_6_1:
8886         case GAUDI_QUEUE_ID_NIC_7_1:
8887         case GAUDI_QUEUE_ID_NIC_8_1:
8888         case GAUDI_QUEUE_ID_NIC_9_1:
8889                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8890                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8891                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8892                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8893                 break;
8894         case GAUDI_QUEUE_ID_NIC_0_2:
8895         case GAUDI_QUEUE_ID_NIC_1_2:
8896         case GAUDI_QUEUE_ID_NIC_2_2:
8897         case GAUDI_QUEUE_ID_NIC_3_2:
8898         case GAUDI_QUEUE_ID_NIC_4_2:
8899         case GAUDI_QUEUE_ID_NIC_5_2:
8900         case GAUDI_QUEUE_ID_NIC_6_2:
8901         case GAUDI_QUEUE_ID_NIC_7_2:
8902         case GAUDI_QUEUE_ID_NIC_8_2:
8903         case GAUDI_QUEUE_ID_NIC_9_2:
8904                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8905                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8906                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8907                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8908                 break;
8909         case GAUDI_QUEUE_ID_NIC_0_3:
8910         case GAUDI_QUEUE_ID_NIC_1_3:
8911         case GAUDI_QUEUE_ID_NIC_2_3:
8912         case GAUDI_QUEUE_ID_NIC_3_3:
8913         case GAUDI_QUEUE_ID_NIC_4_3:
8914         case GAUDI_QUEUE_ID_NIC_5_3:
8915         case GAUDI_QUEUE_ID_NIC_6_3:
8916         case GAUDI_QUEUE_ID_NIC_7_3:
8917         case GAUDI_QUEUE_ID_NIC_8_3:
8918         case GAUDI_QUEUE_ID_NIC_9_3:
8919                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8920                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8921                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8922                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8923                 break;
8924         default:
8925                 return -EINVAL;
8926         }
8927
8928         *addr = CFG_BASE + offset;
8929
8930         return 0;
8931 }
8932
8933 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8934 {
8935         u64 monitor_base;
8936         u32 size = 0;
8937         u16 msg_addr_offset;
8938
8939         /*
8940          * monitor_base should be the content of the base0 address registers,
8941          * so it will be added to the msg short offsets
8942          */
8943         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8944
8945         /* First monitor config packet: low address of the sync */
8946         msg_addr_offset =
8947                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8948                                 monitor_base;
8949
8950         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8951                                         msg_addr_offset);
8952
8953         /* Second monitor config packet: high address of the sync */
8954         msg_addr_offset =
8955                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8956                                 monitor_base;
8957
8958         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8959                                         msg_addr_offset);
8960
8961         /*
8962          * Third monitor config packet: the payload, i.e. what to write when the
8963          * sync triggers
8964          */
8965         msg_addr_offset =
8966                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8967                                 monitor_base;
8968
8969         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8970
8971         return size;
8972 }
8973
8974 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8975                                 struct hl_gen_wait_properties *prop)
8976 {
8977         struct hl_cb *cb = (struct hl_cb *) prop->data;
8978         void *buf = cb->kernel_address;
8979         u64 fence_addr = 0;
8980         u32 size = prop->size;
8981
8982         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8983                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8984                                 prop->q_idx);
8985                 return 0;
8986         }
8987
8988         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8989         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8990                         prop->sob_mask, prop->sob_val, prop->mon_id);
8991         size += gaudi_add_fence_pkt(buf + size);
8992
8993         return size;
8994 }
8995
8996 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8997 {
8998         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8999
9000         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9001                 hw_sob->sob_id);
9002
9003         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9004                         hw_sob->sob_id * 4, 0);
9005
9006         kref_init(&hw_sob->kref);
9007 }
9008
9009 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9010 {
9011         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9012                                                         HL_POWER9_HOST_MAGIC) {
9013                 hdev->power9_64bit_dma_enable = 1;
9014                 hdev->dma_mask = 64;
9015         } else {
9016                 hdev->power9_64bit_dma_enable = 0;
9017                 hdev->dma_mask = 48;
9018         }
9019 }
9020
9021 static u64 gaudi_get_device_time(struct hl_device *hdev)
9022 {
9023         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9024
9025         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9026 }
9027
9028 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9029                                 u32 *block_size, u32 *block_id)
9030 {
9031         return -EPERM;
9032 }
9033
9034 static int gaudi_block_mmap(struct hl_device *hdev,
9035                                 struct vm_area_struct *vma,
9036                                 u32 block_id, u32 block_size)
9037 {
9038         return -EPERM;
9039 }
9040
9041 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9042 {
9043         struct cpu_dyn_regs *dyn_regs =
9044                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9045         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9046                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9047                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9048
9049         WREG32(irq_handler_offset,
9050                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9051 }
9052
9053 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9054 {
9055         switch (pll_idx) {
9056         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9057         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9058         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9059         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9060         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9061         case HL_GAUDI_MME_PLL: return MME_PLL;
9062         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9063         case HL_GAUDI_IF_PLL: return IF_PLL;
9064         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9065         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9066         default: return -EINVAL;
9067         }
9068 }
9069
9070 static int gaudi_add_sync_to_engine_map_entry(
9071         struct hl_sync_to_engine_map *map, u32 reg_value,
9072         enum hl_sync_engine_type engine_type, u32 engine_id)
9073 {
9074         struct hl_sync_to_engine_map_entry *entry;
9075
9076         /* Reg value represents a partial address of sync object,
9077          * it is used as unique identifier. For this we need to
9078          * clear the cutoff cfg base bits from the value.
9079          */
9080         if (reg_value == 0 || reg_value == 0xffffffff)
9081                 return 0;
9082         reg_value -= (u32)CFG_BASE;
9083
9084         /* create a new hash entry */
9085         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9086         if (!entry)
9087                 return -ENOMEM;
9088         entry->engine_type = engine_type;
9089         entry->engine_id = engine_id;
9090         entry->sync_id = reg_value;
9091         hash_add(map->tb, &entry->node, reg_value);
9092
9093         return 0;
9094 }
9095
9096 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9097                                 struct hl_sync_to_engine_map *map)
9098 {
9099         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9100         int i, j, rc;
9101         u32 reg_value;
9102
9103         /* Iterate over TPC engines */
9104         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9105
9106                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9107                                         sds->props[SP_NEXT_TPC] * i);
9108
9109                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9110                                                         ENGINE_TPC, i);
9111                 if (rc)
9112                         goto free_sync_to_engine_map;
9113         }
9114
9115         /* Iterate over MME engines */
9116         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9117                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9118
9119                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9120                                                 sds->props[SP_NEXT_MME] * i +
9121                                                 j * sizeof(u32));
9122
9123                         rc = gaudi_add_sync_to_engine_map_entry(
9124                                 map, reg_value, ENGINE_MME,
9125                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9126                         if (rc)
9127                                 goto free_sync_to_engine_map;
9128                 }
9129         }
9130
9131         /* Iterate over DMA engines */
9132         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9133                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9134                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9135                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9136                                                         ENGINE_DMA, i);
9137                 if (rc)
9138                         goto free_sync_to_engine_map;
9139         }
9140
9141         return 0;
9142
9143 free_sync_to_engine_map:
9144         hl_state_dump_free_sync_to_engine_map(map);
9145
9146         return rc;
9147 }
9148
9149 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9150 {
9151         return FIELD_GET(
9152                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9153                 mon->status);
9154 }
9155
9156 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9157 {
9158         const size_t max_write = 10;
9159         u32 gid, mask, sob;
9160         int i, offset;
9161
9162         /* Sync object ID is calculated as follows:
9163          * (8 * group_id + cleared bits in mask)
9164          */
9165         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9166                         mon->arm_data);
9167         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9168                         mon->arm_data);
9169
9170         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9171                 max_write; mask >>= 1, i++) {
9172                 if (!(mask & 1)) {
9173                         sob = gid * MONITOR_MAX_SOBS + i;
9174
9175                         if (offset > 0)
9176                                 offset += snprintf(sobs + offset, max_write,
9177                                                         ", ");
9178
9179                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9180                 }
9181         }
9182 }
9183
9184 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9185                                 struct hl_device *hdev,
9186                                 struct hl_mon_state_dump *mon)
9187 {
9188         const char *name;
9189         char scratch_buf1[BIN_REG_STRING_SIZE],
9190                 scratch_buf2[BIN_REG_STRING_SIZE];
9191         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9192
9193         name = hl_state_dump_get_monitor_name(hdev, mon);
9194         if (!name)
9195                 name = "";
9196
9197         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9198
9199         return hl_snprintf_resize(
9200                 buf, size, offset,
9201                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9202                 mon->id, name,
9203                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9204                                 mon->arm_data),
9205                 hl_format_as_binary(
9206                         scratch_buf1, sizeof(scratch_buf1),
9207                         FIELD_GET(
9208                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9209                                 mon->arm_data)),
9210                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9211                                 mon->arm_data),
9212                 mon->wr_data,
9213                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9214                 hl_format_as_binary(
9215                         scratch_buf2, sizeof(scratch_buf2),
9216                         FIELD_GET(
9217                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9218                                 mon->status)),
9219                 monitored_sobs);
9220 }
9221
9222
9223 static int gaudi_print_fences_single_engine(
9224         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9225         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9226         size_t *size, size_t *offset)
9227 {
9228         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9229         int rc = -ENOMEM, i;
9230         u32 *statuses, *fences;
9231
9232         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9233                         sizeof(*statuses), GFP_KERNEL);
9234         if (!statuses)
9235                 goto out;
9236
9237         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9238                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9239                          sizeof(*fences), GFP_KERNEL);
9240         if (!fences)
9241                 goto free_status;
9242
9243         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9244                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9245
9246         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9247                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9248                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9249
9250         /* The actual print */
9251         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9252                 u32 fence_id;
9253                 u64 fence_cnt, fence_rdata;
9254                 const char *engine_name;
9255
9256                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9257                         statuses[i]))
9258                         continue;
9259
9260                 fence_id =
9261                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9262                 fence_cnt = base_offset + CFG_BASE +
9263                         sizeof(u32) *
9264                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9265                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9266                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9267                 engine_name = hl_sync_engine_to_string(engine_type);
9268
9269                 rc = hl_snprintf_resize(
9270                         buf, size, offset,
9271                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9272                         engine_name, engine_id,
9273                         i, fence_id,
9274                         fence_cnt, engine_name, engine_id, fence_id, i,
9275                         fence_rdata, engine_name, engine_id, fence_id, i,
9276                         fences[fence_id],
9277                         statuses[i]);
9278                 if (rc)
9279                         goto free_fences;
9280         }
9281
9282         rc = 0;
9283
9284 free_fences:
9285         kfree(fences);
9286 free_status:
9287         kfree(statuses);
9288 out:
9289         return rc;
9290 }
9291
9292
9293 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9294         .monitor_valid = gaudi_monitor_valid,
9295         .print_single_monitor = gaudi_print_single_monitor,
9296         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9297         .print_fences_single_engine = gaudi_print_fences_single_engine,
9298 };
9299
9300 static void gaudi_state_dump_init(struct hl_device *hdev)
9301 {
9302         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9303         int i;
9304
9305         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9306                 hash_add(sds->so_id_to_str_tb,
9307                         &gaudi_so_id_to_str[i].node,
9308                         gaudi_so_id_to_str[i].id);
9309
9310         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9311                 hash_add(sds->monitor_id_to_str_tb,
9312                         &gaudi_monitor_id_to_str[i].node,
9313                         gaudi_monitor_id_to_str[i].id);
9314
9315         sds->props = gaudi_state_dump_specs_props;
9316
9317         sds->sync_namager_names = gaudi_sync_manager_names;
9318
9319         sds->funcs = gaudi_state_dump_funcs;
9320 }
9321
9322 static u32 *gaudi_get_stream_master_qid_arr(void)
9323 {
9324         return gaudi_stream_master;
9325 }
9326
9327 static const struct hl_asic_funcs gaudi_funcs = {
9328         .early_init = gaudi_early_init,
9329         .early_fini = gaudi_early_fini,
9330         .late_init = gaudi_late_init,
9331         .late_fini = gaudi_late_fini,
9332         .sw_init = gaudi_sw_init,
9333         .sw_fini = gaudi_sw_fini,
9334         .hw_init = gaudi_hw_init,
9335         .hw_fini = gaudi_hw_fini,
9336         .halt_engines = gaudi_halt_engines,
9337         .suspend = gaudi_suspend,
9338         .resume = gaudi_resume,
9339         .mmap = gaudi_mmap,
9340         .ring_doorbell = gaudi_ring_doorbell,
9341         .pqe_write = gaudi_pqe_write,
9342         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9343         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9344         .scrub_device_mem = gaudi_scrub_device_mem,
9345         .get_int_queue_base = gaudi_get_int_queue_base,
9346         .test_queues = gaudi_test_queues,
9347         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9348         .asic_dma_pool_free = gaudi_dma_pool_free,
9349         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9350         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9351         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9352         .cs_parser = gaudi_cs_parser,
9353         .asic_dma_map_sg = gaudi_dma_map_sg,
9354         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9355         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9356         .update_eq_ci = gaudi_update_eq_ci,
9357         .context_switch = gaudi_context_switch,
9358         .restore_phase_topology = gaudi_restore_phase_topology,
9359         .debugfs_read32 = gaudi_debugfs_read32,
9360         .debugfs_write32 = gaudi_debugfs_write32,
9361         .debugfs_read64 = gaudi_debugfs_read64,
9362         .debugfs_write64 = gaudi_debugfs_write64,
9363         .debugfs_read_dma = gaudi_debugfs_read_dma,
9364         .add_device_attr = hl_sysfs_add_dev_clk_attr,
9365         .handle_eqe = gaudi_handle_eqe,
9366         .get_events_stat = gaudi_get_events_stat,
9367         .read_pte = gaudi_read_pte,
9368         .write_pte = gaudi_write_pte,
9369         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9370         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9371         .send_heartbeat = gaudi_send_heartbeat,
9372         .debug_coresight = gaudi_debug_coresight,
9373         .is_device_idle = gaudi_is_device_idle,
9374         .non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
9375         .hw_queues_lock = gaudi_hw_queues_lock,
9376         .hw_queues_unlock = gaudi_hw_queues_unlock,
9377         .get_pci_id = gaudi_get_pci_id,
9378         .get_eeprom_data = gaudi_get_eeprom_data,
9379         .send_cpu_message = gaudi_send_cpu_message,
9380         .pci_bars_map = gaudi_pci_bars_map,
9381         .init_iatu = gaudi_init_iatu,
9382         .rreg = hl_rreg,
9383         .wreg = hl_wreg,
9384         .halt_coresight = gaudi_halt_coresight,
9385         .ctx_init = gaudi_ctx_init,
9386         .ctx_fini = gaudi_ctx_fini,
9387         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9388         .load_firmware_to_device = gaudi_load_firmware_to_device,
9389         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9390         .get_signal_cb_size = gaudi_get_signal_cb_size,
9391         .get_wait_cb_size = gaudi_get_wait_cb_size,
9392         .gen_signal_cb = gaudi_gen_signal_cb,
9393         .gen_wait_cb = gaudi_gen_wait_cb,
9394         .reset_sob = gaudi_reset_sob,
9395         .reset_sob_group = gaudi_reset_sob_group,
9396         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9397         .get_device_time = gaudi_get_device_time,
9398         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9399         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9400         .scramble_addr = hl_mmu_scramble_addr,
9401         .descramble_addr = hl_mmu_descramble_addr,
9402         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9403         .get_hw_block_id = gaudi_get_hw_block_id,
9404         .hw_block_mmap = gaudi_block_mmap,
9405         .enable_events_from_fw = gaudi_enable_events_from_fw,
9406         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9407         .init_firmware_loader = gaudi_init_firmware_loader,
9408         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9409         .state_dump_init = gaudi_state_dump_init,
9410         .get_sob_addr = gaudi_get_sob_addr,
9411         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9412         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9413 };
9414
9415 /**
9416  * gaudi_set_asic_funcs - set GAUDI function pointers
9417  *
9418  * @hdev: pointer to hl_device structure
9419  *
9420  */
9421 void gaudi_set_asic_funcs(struct hl_device *hdev)
9422 {
9423         hdev->asic_funcs = &gaudi_funcs;
9424 }