habanalabs/gaudi: disable CGM permanently
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2021 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461         bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465                                                                 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467                                         struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469                                         u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471                                         u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473                                 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479                                 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481                                 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486                 return HL_COLLECTIVE_MASTER;
487
488         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490                 return HL_COLLECTIVE_SLAVE;
491
492         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494                 return HL_COLLECTIVE_SLAVE;
495
496         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498                 return HL_COLLECTIVE_SLAVE;
499
500         return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507         if (hdev->card_type == cpucp_card_type_pmc) {
508                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510                 if (prop->fw_security_enabled)
511                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512                 else
513                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514         } else {
515                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517         }
518 }
519
520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522         struct asic_fixed_properties *prop = &hdev->asic_prop;
523         u32 num_sync_stream_queues = 0;
524         int i;
525
526         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527         prop->hw_queues_props = kcalloc(prop->max_queues,
528                         sizeof(struct hw_queue_properties),
529                         GFP_KERNEL);
530
531         if (!prop->hw_queues_props)
532                 return -ENOMEM;
533
534         for (i = 0 ; i < prop->max_queues ; i++) {
535                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537                         prop->hw_queues_props[i].driver_only = 0;
538                         prop->hw_queues_props[i].supports_sync_stream = 1;
539                         prop->hw_queues_props[i].cb_alloc_flags =
540                                 CB_ALLOC_KERNEL;
541                         num_sync_stream_queues++;
542                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544                         prop->hw_queues_props[i].driver_only = 1;
545                         prop->hw_queues_props[i].supports_sync_stream = 0;
546                         prop->hw_queues_props[i].cb_alloc_flags =
547                                 CB_ALLOC_KERNEL;
548                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550                         prop->hw_queues_props[i].driver_only = 0;
551                         prop->hw_queues_props[i].supports_sync_stream = 0;
552                         prop->hw_queues_props[i].cb_alloc_flags =
553                                 CB_ALLOC_USER;
554
555                 }
556                 prop->hw_queues_props[i].collective_mode =
557                                                 get_collective_mode(hdev, i);
558         }
559
560         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562         prop->collective_first_sob = 0;
563         prop->collective_first_mon = 0;
564
565         /* 2 SOBs per internal queue stream are reserved for collective */
566         prop->sync_stream_first_sob =
567                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568                         * QMAN_STREAMS * HL_RSVD_SOBS;
569
570         /* 1 monitor per internal queue stream are reserved for collective
571          * 2 monitors per external queue stream are reserved for collective
572          */
573         prop->sync_stream_first_mon =
574                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575                         (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577         prop->dram_base_address = DRAM_PHYS_BASE;
578         prop->dram_size = GAUDI_HBM_SIZE_32GB;
579         prop->dram_end_address = prop->dram_base_address +
580                                         prop->dram_size;
581         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583         prop->sram_base_address = SRAM_BASE_ADDR;
584         prop->sram_size = SRAM_SIZE;
585         prop->sram_end_address = prop->sram_base_address +
586                                         prop->sram_size;
587         prop->sram_user_base_address = prop->sram_base_address +
588                                         SRAM_USER_BASE_OFFSET;
589
590         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591         if (hdev->pldm)
592                 prop->mmu_pgt_size = 0x800000; /* 8MB */
593         else
594                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595         prop->mmu_pte_size = HL_PTE_SIZE;
596         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
597         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
598         prop->dram_page_size = PAGE_SIZE_2MB;
599         prop->dram_supports_virtual_memory = false;
600
601         prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
602         prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
603         prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
604         prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
605         prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
606         prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
607         prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
608         prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
609         prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
610         prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
611         prop->pmmu.start_addr = VA_HOST_SPACE_START;
612         prop->pmmu.end_addr =
613                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614         prop->pmmu.page_size = PAGE_SIZE_4KB;
615         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616         prop->pmmu.last_mask = LAST_MASK;
617
618         /* PMMU and HPMMU are the same except of page size */
619         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
620         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
621
622         /* shifts and masks are the same in PMMU and DMMU */
623         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
624         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
625         prop->dmmu.end_addr = VA_HOST_SPACE_END;
626         prop->dmmu.page_size = PAGE_SIZE_2MB;
627
628         prop->cfg_size = CFG_SIZE;
629         prop->max_asid = MAX_ASID;
630         prop->num_of_events = GAUDI_EVENT_SIZE;
631         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
632
633         set_default_power_values(hdev);
634
635         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
636         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
637
638         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
639         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
640
641         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
642                                         CARD_NAME_MAX_LEN);
643
644         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
645
646         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
647                         prop->sync_stream_first_sob +
648                         (num_sync_stream_queues * HL_RSVD_SOBS);
649         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
650                         prop->sync_stream_first_mon +
651                         (num_sync_stream_queues * HL_RSVD_MONS);
652
653         prop->first_available_user_msix_interrupt = USHRT_MAX;
654
655         for (i = 0 ; i < HL_MAX_DCORES ; i++)
656                 prop->first_available_cq[i] = USHRT_MAX;
657
658         prop->fw_cpu_boot_dev_sts0_valid = false;
659         prop->fw_cpu_boot_dev_sts1_valid = false;
660         prop->hard_reset_done_by_fw = false;
661         prop->gic_interrupts_enable = true;
662
663         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
664
665         prop->clk_pll_index = HL_GAUDI_MME_PLL;
666         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
667
668         prop->use_get_power_for_reset_history = true;
669
670         return 0;
671 }
672
673 static int gaudi_pci_bars_map(struct hl_device *hdev)
674 {
675         static const char * const name[] = {"SRAM", "CFG", "HBM"};
676         bool is_wc[3] = {false, false, true};
677         int rc;
678
679         rc = hl_pci_bars_map(hdev, name, is_wc);
680         if (rc)
681                 return rc;
682
683         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
684                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
685
686         return 0;
687 }
688
689 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
690 {
691         struct gaudi_device *gaudi = hdev->asic_specific;
692         struct hl_inbound_pci_region pci_region;
693         u64 old_addr = addr;
694         int rc;
695
696         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
697                 return old_addr;
698
699         if (hdev->asic_prop.iatu_done_by_fw)
700                 return U64_MAX;
701
702         /* Inbound Region 2 - Bar 4 - Point to HBM */
703         pci_region.mode = PCI_BAR_MATCH_MODE;
704         pci_region.bar = HBM_BAR_ID;
705         pci_region.addr = addr;
706         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
707         if (rc)
708                 return U64_MAX;
709
710         if (gaudi) {
711                 old_addr = gaudi->hbm_bar_cur_addr;
712                 gaudi->hbm_bar_cur_addr = addr;
713         }
714
715         return old_addr;
716 }
717
718 static int gaudi_init_iatu(struct hl_device *hdev)
719 {
720         struct hl_inbound_pci_region inbound_region;
721         struct hl_outbound_pci_region outbound_region;
722         int rc;
723
724         if (hdev->asic_prop.iatu_done_by_fw)
725                 return 0;
726
727         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
728         inbound_region.mode = PCI_BAR_MATCH_MODE;
729         inbound_region.bar = SRAM_BAR_ID;
730         inbound_region.addr = SRAM_BASE_ADDR;
731         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
732         if (rc)
733                 goto done;
734
735         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
736         inbound_region.mode = PCI_BAR_MATCH_MODE;
737         inbound_region.bar = CFG_BAR_ID;
738         inbound_region.addr = SPI_FLASH_BASE_ADDR;
739         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
740         if (rc)
741                 goto done;
742
743         /* Inbound Region 2 - Bar 4 - Point to HBM */
744         inbound_region.mode = PCI_BAR_MATCH_MODE;
745         inbound_region.bar = HBM_BAR_ID;
746         inbound_region.addr = DRAM_PHYS_BASE;
747         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
748         if (rc)
749                 goto done;
750
751         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
752
753         /* Outbound Region 0 - Point to Host */
754         outbound_region.addr = HOST_PHYS_BASE;
755         outbound_region.size = HOST_PHYS_SIZE;
756         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
757
758 done:
759         return rc;
760 }
761
762 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
763 {
764         return RREG32(mmHW_STATE);
765 }
766
767 static int gaudi_early_init(struct hl_device *hdev)
768 {
769         struct asic_fixed_properties *prop = &hdev->asic_prop;
770         struct pci_dev *pdev = hdev->pdev;
771         u32 fw_boot_status;
772         int rc;
773
774         rc = gaudi_set_fixed_properties(hdev);
775         if (rc) {
776                 dev_err(hdev->dev, "Failed setting fixed properties\n");
777                 return rc;
778         }
779
780         /* Check BAR sizes */
781         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
782                 dev_err(hdev->dev,
783                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
784                         SRAM_BAR_ID,
785                         (unsigned long long) pci_resource_len(pdev,
786                                                         SRAM_BAR_ID),
787                         SRAM_BAR_SIZE);
788                 rc = -ENODEV;
789                 goto free_queue_props;
790         }
791
792         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
793                 dev_err(hdev->dev,
794                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
795                         CFG_BAR_ID,
796                         (unsigned long long) pci_resource_len(pdev,
797                                                                 CFG_BAR_ID),
798                         CFG_BAR_SIZE);
799                 rc = -ENODEV;
800                 goto free_queue_props;
801         }
802
803         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
804         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
805
806         /* If FW security is enabled at this point it means no access to ELBI */
807         if (hdev->asic_prop.fw_security_enabled) {
808                 hdev->asic_prop.iatu_done_by_fw = true;
809
810                 /*
811                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
812                  * decision can only be taken based on PCI ID security.
813                  */
814                 hdev->asic_prop.gic_interrupts_enable = false;
815                 goto pci_init;
816         }
817
818         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
819                                 &fw_boot_status);
820         if (rc)
821                 goto free_queue_props;
822
823         /* Check whether FW is configuring iATU */
824         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
825                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
826                 hdev->asic_prop.iatu_done_by_fw = true;
827
828 pci_init:
829         rc = hl_pci_init(hdev);
830         if (rc)
831                 goto free_queue_props;
832
833         /* Before continuing in the initialization, we need to read the preboot
834          * version to determine whether we run with a security-enabled firmware
835          */
836         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
837                                         mmCPU_BOOT_DEV_STS0,
838                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
839                                         mmCPU_BOOT_ERR1,
840                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
841         if (rc) {
842                 if (hdev->reset_on_preboot_fail)
843                         hdev->asic_funcs->hw_fini(hdev, true, false);
844                 goto pci_fini;
845         }
846
847         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
848                 dev_info(hdev->dev,
849                         "H/W state is dirty, must reset before initializing\n");
850                 hdev->asic_funcs->hw_fini(hdev, true, false);
851         }
852
853         return 0;
854
855 pci_fini:
856         hl_pci_fini(hdev);
857 free_queue_props:
858         kfree(hdev->asic_prop.hw_queues_props);
859         return rc;
860 }
861
862 static int gaudi_early_fini(struct hl_device *hdev)
863 {
864         kfree(hdev->asic_prop.hw_queues_props);
865         hl_pci_fini(hdev);
866
867         return 0;
868 }
869
870 /**
871  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
872  *
873  * @hdev: pointer to hl_device structure
874  *
875  */
876 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
877 {
878         struct asic_fixed_properties *prop = &hdev->asic_prop;
879         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
880         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
881         int rc;
882
883         if (hdev->asic_prop.fw_security_enabled) {
884                 struct gaudi_device *gaudi = hdev->asic_specific;
885
886                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
887                         return 0;
888
889                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
890
891                 if (rc)
892                         return rc;
893
894                 freq = pll_freq_arr[2];
895         } else {
896                 /* Backward compatibility */
897                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
898                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
899                 nr = RREG32(mmPSOC_CPU_PLL_NR);
900                 nf = RREG32(mmPSOC_CPU_PLL_NF);
901                 od = RREG32(mmPSOC_CPU_PLL_OD);
902
903                 if (div_sel == DIV_SEL_REF_CLK ||
904                                 div_sel == DIV_SEL_DIVIDED_REF) {
905                         if (div_sel == DIV_SEL_REF_CLK)
906                                 freq = PLL_REF_CLK;
907                         else
908                                 freq = PLL_REF_CLK / (div_fctr + 1);
909                 } else if (div_sel == DIV_SEL_PLL_CLK ||
910                         div_sel == DIV_SEL_DIVIDED_PLL) {
911                         pll_clk = PLL_REF_CLK * (nf + 1) /
912                                         ((nr + 1) * (od + 1));
913                         if (div_sel == DIV_SEL_PLL_CLK)
914                                 freq = pll_clk;
915                         else
916                                 freq = pll_clk / (div_fctr + 1);
917                 } else {
918                         dev_warn(hdev->dev,
919                                 "Received invalid div select value: %d",
920                                 div_sel);
921                         freq = 0;
922                 }
923         }
924
925         prop->psoc_timestamp_frequency = freq;
926         prop->psoc_pci_pll_nr = nr;
927         prop->psoc_pci_pll_nf = nf;
928         prop->psoc_pci_pll_od = od;
929         prop->psoc_pci_pll_div_factor = div_fctr;
930
931         return 0;
932 }
933
934 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
935                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
936 {
937         struct asic_fixed_properties *prop = &hdev->asic_prop;
938         struct packet_lin_dma *init_tpc_mem_pkt;
939         struct hl_cs_job *job;
940         struct hl_cb *cb;
941         u64 dst_addr;
942         u32 cb_size, ctl;
943         u8 tpc_id;
944         int rc;
945
946         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
947         if (!cb)
948                 return -EFAULT;
949
950         init_tpc_mem_pkt = cb->kernel_address;
951         cb_size = sizeof(*init_tpc_mem_pkt);
952         memset(init_tpc_mem_pkt, 0, cb_size);
953
954         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
955
956         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
957         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
958         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
959         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
960
961         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
962
963         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
964         dst_addr = (prop->sram_user_base_address &
965                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
966                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
967         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
968
969         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
970         if (!job) {
971                 dev_err(hdev->dev, "Failed to allocate a new job\n");
972                 rc = -ENOMEM;
973                 goto release_cb;
974         }
975
976         job->id = 0;
977         job->user_cb = cb;
978         atomic_inc(&job->user_cb->cs_cnt);
979         job->user_cb_size = cb_size;
980         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
981         job->patched_cb = job->user_cb;
982         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
983
984         hl_debugfs_add_job(hdev, job);
985
986         rc = gaudi_send_job_on_qman0(hdev, job);
987
988         if (rc)
989                 goto free_job;
990
991         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
992                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
993                 if (rc)
994                         break;
995         }
996
997 free_job:
998         hl_userptr_delete_list(hdev, &job->userptr_list);
999         hl_debugfs_remove_job(hdev, job);
1000         kfree(job);
1001         atomic_dec(&cb->cs_cnt);
1002
1003 release_cb:
1004         hl_cb_put(cb);
1005         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1006
1007         return rc;
1008 }
1009
1010 /*
1011  * gaudi_init_tpc_mem() - Initialize TPC memories.
1012  * @hdev: Pointer to hl_device structure.
1013  *
1014  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1015  *
1016  * Return: 0 for success, negative value for error.
1017  */
1018 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1019 {
1020         const struct firmware *fw;
1021         size_t fw_size;
1022         void *cpu_addr;
1023         dma_addr_t dma_handle;
1024         int rc, count = 5;
1025
1026 again:
1027         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1028         if (rc == -EINTR && count-- > 0) {
1029                 msleep(50);
1030                 goto again;
1031         }
1032
1033         if (rc) {
1034                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1035                                 GAUDI_TPC_FW_FILE);
1036                 goto out;
1037         }
1038
1039         fw_size = fw->size;
1040         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1041                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1042         if (!cpu_addr) {
1043                 dev_err(hdev->dev,
1044                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1045                         fw_size);
1046                 rc = -ENOMEM;
1047                 goto out;
1048         }
1049
1050         memcpy(cpu_addr, fw->data, fw_size);
1051
1052         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1053
1054         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1055                         dma_handle);
1056
1057 out:
1058         release_firmware(fw);
1059         return rc;
1060 }
1061
1062 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1063 {
1064         struct gaudi_device *gaudi = hdev->asic_specific;
1065         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1066         struct hl_hw_queue *q;
1067         u32 i, sob_id, sob_group_id, queue_id;
1068
1069         /* Iterate through SOB groups and assign a SOB for each slave queue */
1070         sob_group_id =
1071                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1072         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1073
1074         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1075         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1076                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1077                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1078         }
1079
1080         /* Both DMA5 and TPC7 use the same resources since only a single
1081          * engine need to participate in the reduction process
1082          */
1083         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1084         q = &hdev->kernel_queues[queue_id];
1085         q->sync_stream_prop.collective_sob_id =
1086                         sob_id + NIC_NUMBER_OF_ENGINES;
1087
1088         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1089         q = &hdev->kernel_queues[queue_id];
1090         q->sync_stream_prop.collective_sob_id =
1091                         sob_id + NIC_NUMBER_OF_ENGINES;
1092 }
1093
1094 static void gaudi_sob_group_hw_reset(struct kref *ref)
1095 {
1096         struct gaudi_hw_sob_group *hw_sob_group =
1097                 container_of(ref, struct gaudi_hw_sob_group, kref);
1098         struct hl_device *hdev = hw_sob_group->hdev;
1099         int i;
1100
1101         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1102                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1103                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1104
1105         kref_init(&hw_sob_group->kref);
1106 }
1107
1108 static void gaudi_sob_group_reset_error(struct kref *ref)
1109 {
1110         struct gaudi_hw_sob_group *hw_sob_group =
1111                 container_of(ref, struct gaudi_hw_sob_group, kref);
1112         struct hl_device *hdev = hw_sob_group->hdev;
1113
1114         dev_crit(hdev->dev,
1115                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1116                 hw_sob_group->base_sob_id);
1117 }
1118
1119 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1120 {
1121         struct gaudi_collective_properties *prop;
1122         int i;
1123
1124         prop = &gaudi->collective_props;
1125
1126         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1127
1128         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1129                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1130                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1131                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1132         /* Set collective engine bit */
1133         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1134                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1135 }
1136
1137 static int gaudi_collective_init(struct hl_device *hdev)
1138 {
1139         u32 i, sob_id, reserved_sobs_per_group;
1140         struct gaudi_collective_properties *prop;
1141         struct gaudi_device *gaudi;
1142
1143         gaudi = hdev->asic_specific;
1144         prop = &gaudi->collective_props;
1145         sob_id = hdev->asic_prop.collective_first_sob;
1146
1147         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1148         reserved_sobs_per_group =
1149                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1150
1151         /* Init SOB groups */
1152         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1153                 prop->hw_sob_group[i].hdev = hdev;
1154                 prop->hw_sob_group[i].base_sob_id = sob_id;
1155                 sob_id += reserved_sobs_per_group;
1156                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1157         }
1158
1159         for (i = 0 ; i < QMAN_STREAMS; i++) {
1160                 prop->next_sob_group_val[i] = 1;
1161                 prop->curr_sob_group_idx[i] = 0;
1162                 gaudi_collective_map_sobs(hdev, i);
1163         }
1164
1165         gaudi_collective_mstr_sob_mask_set(gaudi);
1166
1167         return 0;
1168 }
1169
1170 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1171 {
1172         struct gaudi_device *gaudi = hdev->asic_specific;
1173         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1174
1175         kref_put(&cprop->hw_sob_group[sob_group].kref,
1176                                         gaudi_sob_group_hw_reset);
1177 }
1178
1179 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1180                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1181 {
1182         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1183         struct gaudi_collective_properties *cprop;
1184         struct hl_gen_wait_properties wait_prop;
1185         struct hl_sync_stream_properties *prop;
1186         struct gaudi_device *gaudi;
1187
1188         gaudi = hdev->asic_specific;
1189         cprop = &gaudi->collective_props;
1190         queue_id = job->hw_queue_id;
1191         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1192
1193         master_sob_base =
1194                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1195         master_monitor = prop->collective_mstr_mon_id[0];
1196
1197         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1198
1199         dev_dbg(hdev->dev,
1200                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1201                 master_sob_base, cprop->mstr_sob_mask[0],
1202                 cprop->next_sob_group_val[stream],
1203                 master_monitor, queue_id);
1204
1205         wait_prop.data = (void *) job->patched_cb;
1206         wait_prop.sob_base = master_sob_base;
1207         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1208         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1209         wait_prop.mon_id = master_monitor;
1210         wait_prop.q_idx = queue_id;
1211         wait_prop.size = cb_size;
1212         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1213
1214         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1215         master_monitor = prop->collective_mstr_mon_id[1];
1216
1217         dev_dbg(hdev->dev,
1218                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1219                 master_sob_base, cprop->mstr_sob_mask[1],
1220                 cprop->next_sob_group_val[stream],
1221                 master_monitor, queue_id);
1222
1223         wait_prop.sob_base = master_sob_base;
1224         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1225         wait_prop.mon_id = master_monitor;
1226         wait_prop.size = cb_size;
1227         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1228 }
1229
1230 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1231                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1232 {
1233         struct hl_gen_wait_properties wait_prop;
1234         struct hl_sync_stream_properties *prop;
1235         u32 queue_id, cb_size = 0;
1236
1237         queue_id = job->hw_queue_id;
1238         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1239
1240         if (job->cs->encaps_signals) {
1241                 /* use the encaps signal handle store earlier in the flow
1242                  * and set the SOB information from the encaps
1243                  * signals handle
1244                  */
1245                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1246                                                 cs_cmpl);
1247
1248                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1249                                 job->cs->sequence,
1250                                 cs_cmpl->hw_sob->sob_id,
1251                                 cs_cmpl->sob_val);
1252         }
1253
1254         /* Add to wait CBs using slave monitor */
1255         wait_prop.data = (void *) job->user_cb;
1256         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1257         wait_prop.sob_mask = 0x1;
1258         wait_prop.sob_val = cs_cmpl->sob_val;
1259         wait_prop.mon_id = prop->collective_slave_mon_id;
1260         wait_prop.q_idx = queue_id;
1261         wait_prop.size = cb_size;
1262
1263         dev_dbg(hdev->dev,
1264                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1265                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1266                 prop->collective_slave_mon_id, queue_id);
1267
1268         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1269
1270         dev_dbg(hdev->dev,
1271                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1272                 prop->collective_sob_id, queue_id);
1273
1274         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1275                         prop->collective_sob_id, cb_size, false);
1276 }
1277
1278 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1279 {
1280         struct hl_cs_compl *signal_cs_cmpl =
1281                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1282         struct hl_cs_compl *cs_cmpl =
1283                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1284         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1285         struct gaudi_collective_properties *cprop;
1286         u32 stream, queue_id, sob_group_offset;
1287         struct gaudi_device *gaudi;
1288         struct hl_device *hdev;
1289         struct hl_cs_job *job;
1290         struct hl_ctx *ctx;
1291
1292         ctx = cs->ctx;
1293         hdev = ctx->hdev;
1294         gaudi = hdev->asic_specific;
1295         cprop = &gaudi->collective_props;
1296
1297         if (cs->encaps_signals) {
1298                 cs_cmpl->hw_sob = handle->hw_sob;
1299                 /* at this checkpoint we only need the hw_sob pointer
1300                  * for the completion check before start going over the jobs
1301                  * of the master/slaves, the sob_value will be taken later on
1302                  * in gaudi_collective_slave_init_job depends on each
1303                  * job wait offset value.
1304                  */
1305                 cs_cmpl->sob_val = 0;
1306         } else {
1307                 /* copy the SOB id and value of the signal CS */
1308                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1309                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1310         }
1311
1312         /* check again if the signal cs already completed.
1313          * if yes then don't send any wait cs since the hw_sob
1314          * could be in reset already. if signal is not completed
1315          * then get refcount to hw_sob to prevent resetting the sob
1316          * while wait cs is not submitted.
1317          * note that this check is protected by two locks,
1318          * hw queue lock and completion object lock,
1319          * and the same completion object lock also protects
1320          * the hw_sob reset handler function.
1321          * The hw_queue lock prevent out of sync of hw_sob
1322          * refcount value, changed by signal/wait flows.
1323          */
1324         spin_lock(&signal_cs_cmpl->lock);
1325
1326         if (completion_done(&cs->signal_fence->completion)) {
1327                 spin_unlock(&signal_cs_cmpl->lock);
1328                 return -EINVAL;
1329         }
1330         /* Increment kref since all slave queues are now waiting on it */
1331         kref_get(&cs_cmpl->hw_sob->kref);
1332
1333         spin_unlock(&signal_cs_cmpl->lock);
1334
1335         /* Calculate the stream from collective master queue (1st job) */
1336         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1337         stream = job->hw_queue_id % 4;
1338         sob_group_offset =
1339                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1340
1341         list_for_each_entry(job, &cs->job_list, cs_node) {
1342                 queue_id = job->hw_queue_id;
1343
1344                 if (hdev->kernel_queues[queue_id].collective_mode ==
1345                                 HL_COLLECTIVE_MASTER)
1346                         gaudi_collective_master_init_job(hdev, job, stream,
1347                                                 sob_group_offset);
1348                 else
1349                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1350         }
1351
1352         cs_cmpl->sob_group = sob_group_offset;
1353
1354         /* Handle sob group kref and wraparound */
1355         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1356         cprop->next_sob_group_val[stream]++;
1357
1358         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1359                 /*
1360                  * Decrement as we reached the max value.
1361                  * The release function won't be called here as we've
1362                  * just incremented the refcount.
1363                  */
1364                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1365                                 gaudi_sob_group_reset_error);
1366                 cprop->next_sob_group_val[stream] = 1;
1367                 /* only two SOBs are currently in use */
1368                 cprop->curr_sob_group_idx[stream] =
1369                         (cprop->curr_sob_group_idx[stream] + 1) &
1370                                                         (HL_RSVD_SOBS - 1);
1371
1372                 gaudi_collective_map_sobs(hdev, stream);
1373
1374                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1375                                 cprop->curr_sob_group_idx[stream], stream);
1376         }
1377
1378         mb();
1379         hl_fence_put(cs->signal_fence);
1380         cs->signal_fence = NULL;
1381
1382         return 0;
1383 }
1384
1385 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1386                 struct hl_ctx *ctx, struct hl_cs *cs,
1387                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1388                 u32 encaps_signal_offset)
1389 {
1390         struct hw_queue_properties *hw_queue_prop;
1391         struct hl_cs_counters_atomic *cntr;
1392         struct hl_cs_job *job;
1393         struct hl_cb *cb;
1394         u32 cb_size;
1395         bool patched_cb;
1396
1397         cntr = &hdev->aggregated_cs_counters;
1398
1399         if (mode == HL_COLLECTIVE_MASTER) {
1400                 /* CB size of collective master queue contains
1401                  * 4 msg short packets for monitor 1 configuration
1402                  * 1 fence packet
1403                  * 4 msg short packets for monitor 2 configuration
1404                  * 1 fence packet
1405                  * 2 msg prot packets for completion and MSI-X
1406                  */
1407                 cb_size = sizeof(struct packet_msg_short) * 8 +
1408                                 sizeof(struct packet_fence) * 2 +
1409                                 sizeof(struct packet_msg_prot) * 2;
1410                 patched_cb = true;
1411         } else {
1412                 /* CB size of collective slave queues contains
1413                  * 4 msg short packets for monitor configuration
1414                  * 1 fence packet
1415                  * 1 additional msg short packet for sob signal
1416                  */
1417                 cb_size = sizeof(struct packet_msg_short) * 5 +
1418                                 sizeof(struct packet_fence);
1419                 patched_cb = false;
1420         }
1421
1422         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1423         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1424         if (!job) {
1425                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1426                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1427                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1428                 return -ENOMEM;
1429         }
1430
1431         /* Allocate internal mapped CB for non patched CBs */
1432         cb = hl_cb_kernel_create(hdev, cb_size,
1433                         hdev->mmu_enable && !patched_cb);
1434         if (!cb) {
1435                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1436                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1437                 kfree(job);
1438                 return -EFAULT;
1439         }
1440
1441         job->id = 0;
1442         job->cs = cs;
1443         job->user_cb = cb;
1444         atomic_inc(&job->user_cb->cs_cnt);
1445         job->user_cb_size = cb_size;
1446         job->hw_queue_id = queue_id;
1447
1448         /* since its guaranteed to have only one chunk in the collective wait
1449          * cs, we can use this chunk to set the encapsulated signal offset
1450          * in the jobs.
1451          */
1452         if (cs->encaps_signals)
1453                 job->encaps_sig_wait_offset = encaps_signal_offset;
1454
1455         /*
1456          * No need in parsing, user CB is the patched CB.
1457          * We call hl_cb_destroy() out of two reasons - we don't need
1458          * the CB in the CB idr anymore and to decrement its refcount as
1459          * it was incremented inside hl_cb_kernel_create().
1460          */
1461         if (patched_cb)
1462                 job->patched_cb = job->user_cb;
1463         else
1464                 job->patched_cb = NULL;
1465
1466         job->job_cb_size = job->user_cb_size;
1467         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1468
1469         /* increment refcount as for external queues we get completion */
1470         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1471                 cs_get(cs);
1472
1473         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1474
1475         list_add_tail(&job->cs_node, &cs->job_list);
1476
1477         hl_debugfs_add_job(hdev, job);
1478
1479         return 0;
1480 }
1481
1482 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1483                 struct hl_ctx *ctx, struct hl_cs *cs,
1484                 u32 wait_queue_id, u32 collective_engine_id,
1485                 u32 encaps_signal_offset)
1486 {
1487         struct gaudi_device *gaudi = hdev->asic_specific;
1488         struct hw_queue_properties *hw_queue_prop;
1489         u32 queue_id, collective_queue, num_jobs;
1490         u32 stream, nic_queue, nic_idx = 0;
1491         bool skip;
1492         int i, rc = 0;
1493
1494         /* Verify wait queue id is configured as master */
1495         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1496         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1497                 dev_err(hdev->dev,
1498                         "Queue %d is not configured as collective master\n",
1499                         wait_queue_id);
1500                 return -EINVAL;
1501         }
1502
1503         /* Verify engine id is supported */
1504         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1505                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1506                 dev_err(hdev->dev,
1507                         "Collective wait does not support engine %u\n",
1508                         collective_engine_id);
1509                 return -EINVAL;
1510         }
1511
1512         stream = wait_queue_id % 4;
1513
1514         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1515                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1516         else
1517                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1518
1519         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1520         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1521
1522         /* First job goes to the collective master queue, it will wait for
1523          * the collective slave queues to finish execution.
1524          * The synchronization is done using two monitors:
1525          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1526          * reduction engine (DMA5/TPC7).
1527          *
1528          * Rest of the jobs goes to the collective slave queues which will
1529          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1530          */
1531         for (i = 0 ; i < num_jobs ; i++) {
1532                 if (i == 0) {
1533                         queue_id = wait_queue_id;
1534                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1535                                 HL_COLLECTIVE_MASTER, queue_id,
1536                                 wait_queue_id, encaps_signal_offset);
1537                 } else {
1538                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1539                                 if (gaudi->hw_cap_initialized &
1540                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1541                                         skip = false;
1542                                 else
1543                                         skip = true;
1544
1545                                 queue_id = nic_queue;
1546                                 nic_queue += 4;
1547                                 nic_idx++;
1548
1549                                 if (skip)
1550                                         continue;
1551                         } else {
1552                                 queue_id = collective_queue;
1553                         }
1554
1555                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1556                                 HL_COLLECTIVE_SLAVE, queue_id,
1557                                 wait_queue_id, encaps_signal_offset);
1558                 }
1559
1560                 if (rc)
1561                         return rc;
1562         }
1563
1564         return rc;
1565 }
1566
1567 static int gaudi_late_init(struct hl_device *hdev)
1568 {
1569         struct gaudi_device *gaudi = hdev->asic_specific;
1570         int rc;
1571
1572         rc = gaudi->cpucp_info_get(hdev);
1573         if (rc) {
1574                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1575                 return rc;
1576         }
1577
1578         if ((hdev->card_type == cpucp_card_type_pci) &&
1579                         (hdev->nic_ports_mask & 0x3)) {
1580                 dev_info(hdev->dev,
1581                         "PCI card detected, only 8 ports are enabled\n");
1582                 hdev->nic_ports_mask &= ~0x3;
1583
1584                 /* Stop and disable unused NIC QMANs */
1585                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1586                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1587                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1588
1589                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1590                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1591                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1592
1593                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1594                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1595
1596                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1597         }
1598
1599         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1600         if (rc) {
1601                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1602                 return rc;
1603         }
1604
1605         /* Scrub both SRAM and DRAM */
1606         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1607         if (rc)
1608                 goto disable_pci_access;
1609
1610         rc = gaudi_fetch_psoc_frequency(hdev);
1611         if (rc) {
1612                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1613                 goto disable_pci_access;
1614         }
1615
1616         rc = gaudi_mmu_clear_pgt_range(hdev);
1617         if (rc) {
1618                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1619                 goto disable_pci_access;
1620         }
1621
1622         rc = gaudi_init_tpc_mem(hdev);
1623         if (rc) {
1624                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1625                 goto disable_pci_access;
1626         }
1627
1628         rc = gaudi_collective_init(hdev);
1629         if (rc) {
1630                 dev_err(hdev->dev, "Failed to init collective\n");
1631                 goto disable_pci_access;
1632         }
1633
1634         /* We only support a single ASID for the user, so for the sake of optimization, just
1635          * initialize the ASID one time during device initialization with the fixed value of 1
1636          */
1637         gaudi_mmu_prepare(hdev, 1);
1638
1639         hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
1640
1641         return 0;
1642
1643 disable_pci_access:
1644         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1645
1646         return rc;
1647 }
1648
1649 static void gaudi_late_fini(struct hl_device *hdev)
1650 {
1651         const struct hwmon_channel_info **channel_info_arr;
1652         int i = 0;
1653
1654         if (!hdev->hl_chip_info->info)
1655                 return;
1656
1657         channel_info_arr = hdev->hl_chip_info->info;
1658
1659         while (channel_info_arr[i]) {
1660                 kfree(channel_info_arr[i]->config);
1661                 kfree(channel_info_arr[i]);
1662                 i++;
1663         }
1664
1665         kfree(channel_info_arr);
1666
1667         hdev->hl_chip_info->info = NULL;
1668 }
1669
1670 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1671 {
1672         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1673         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1674         int i, j, rc = 0;
1675
1676         /*
1677          * The device CPU works with 40-bits addresses, while bit 39 must be set
1678          * to '1' when accessing the host.
1679          * Bits 49:39 of the full host address are saved for a later
1680          * configuration of the HW to perform extension to 50 bits.
1681          * Because there is a single HW register that holds the extension bits,
1682          * these bits must be identical in all allocated range.
1683          */
1684
1685         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1686                 virt_addr_arr[i] =
1687                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1688                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1689                                                 &dma_addr_arr[i],
1690                                                 GFP_KERNEL | __GFP_ZERO);
1691                 if (!virt_addr_arr[i]) {
1692                         rc = -ENOMEM;
1693                         goto free_dma_mem_arr;
1694                 }
1695
1696                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1697                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1698                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1699                         break;
1700         }
1701
1702         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1703                 dev_err(hdev->dev,
1704                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1705                 rc = -EFAULT;
1706                 goto free_dma_mem_arr;
1707         }
1708
1709         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1710         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1711         hdev->cpu_pci_msb_addr =
1712                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1713
1714         if (!hdev->asic_prop.fw_security_enabled)
1715                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1716
1717 free_dma_mem_arr:
1718         for (j = 0 ; j < i ; j++)
1719                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1720                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1721                                                 virt_addr_arr[j],
1722                                                 dma_addr_arr[j]);
1723
1724         return rc;
1725 }
1726
1727 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1728 {
1729         struct gaudi_device *gaudi = hdev->asic_specific;
1730         struct gaudi_internal_qman_info *q;
1731         u32 i;
1732
1733         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1734                 q = &gaudi->internal_qmans[i];
1735                 if (!q->pq_kernel_addr)
1736                         continue;
1737                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1738                                                         q->pq_kernel_addr,
1739                                                         q->pq_dma_addr);
1740         }
1741 }
1742
1743 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1744 {
1745         struct gaudi_device *gaudi = hdev->asic_specific;
1746         struct gaudi_internal_qman_info *q;
1747         int rc, i;
1748
1749         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1750                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1751                         continue;
1752
1753                 q = &gaudi->internal_qmans[i];
1754
1755                 switch (i) {
1756                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1757                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1758                         break;
1759                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1760                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1761                         break;
1762                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1763                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1764                         break;
1765                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1766                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1767                         break;
1768                 default:
1769                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1770                         rc = -EINVAL;
1771                         goto free_internal_qmans_pq_mem;
1772                 }
1773
1774                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1775                                                 hdev, q->pq_size,
1776                                                 &q->pq_dma_addr,
1777                                                 GFP_KERNEL | __GFP_ZERO);
1778                 if (!q->pq_kernel_addr) {
1779                         rc = -ENOMEM;
1780                         goto free_internal_qmans_pq_mem;
1781                 }
1782         }
1783
1784         return 0;
1785
1786 free_internal_qmans_pq_mem:
1787         gaudi_free_internal_qmans_pq_mem(hdev);
1788         return rc;
1789 }
1790
1791 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1792 {
1793         struct asic_fixed_properties *prop = &hdev->asic_prop;
1794         struct pci_mem_region *region;
1795
1796         /* CFG */
1797         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1798         region->region_base = CFG_BASE;
1799         region->region_size = CFG_SIZE;
1800         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1801         region->bar_size = CFG_BAR_SIZE;
1802         region->bar_id = CFG_BAR_ID;
1803         region->used = 1;
1804
1805         /* SRAM */
1806         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1807         region->region_base = SRAM_BASE_ADDR;
1808         region->region_size = SRAM_SIZE;
1809         region->offset_in_bar = 0;
1810         region->bar_size = SRAM_BAR_SIZE;
1811         region->bar_id = SRAM_BAR_ID;
1812         region->used = 1;
1813
1814         /* DRAM */
1815         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1816         region->region_base = DRAM_PHYS_BASE;
1817         region->region_size = hdev->asic_prop.dram_size;
1818         region->offset_in_bar = 0;
1819         region->bar_size = prop->dram_pci_bar_size;
1820         region->bar_id = HBM_BAR_ID;
1821         region->used = 1;
1822
1823         /* SP SRAM */
1824         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1825         region->region_base = PSOC_SCRATCHPAD_ADDR;
1826         region->region_size = PSOC_SCRATCHPAD_SIZE;
1827         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1828         region->bar_size = CFG_BAR_SIZE;
1829         region->bar_id = CFG_BAR_ID;
1830         region->used = 1;
1831 }
1832
1833 static int gaudi_sw_init(struct hl_device *hdev)
1834 {
1835         struct gaudi_device *gaudi;
1836         u32 i, event_id = 0;
1837         int rc;
1838
1839         /* Allocate device structure */
1840         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1841         if (!gaudi)
1842                 return -ENOMEM;
1843
1844         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1845                 if (gaudi_irq_map_table[i].valid) {
1846                         if (event_id == GAUDI_EVENT_SIZE) {
1847                                 dev_err(hdev->dev,
1848                                         "Event array exceeds the limit of %u events\n",
1849                                         GAUDI_EVENT_SIZE);
1850                                 rc = -EINVAL;
1851                                 goto free_gaudi_device;
1852                         }
1853
1854                         gaudi->events[event_id++] =
1855                                         gaudi_irq_map_table[i].fc_id;
1856                 }
1857         }
1858
1859         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1860
1861         hdev->asic_specific = gaudi;
1862
1863         /* Create DMA pool for small allocations */
1864         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1865                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1866         if (!hdev->dma_pool) {
1867                 dev_err(hdev->dev, "failed to create DMA pool\n");
1868                 rc = -ENOMEM;
1869                 goto free_gaudi_device;
1870         }
1871
1872         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1873         if (rc)
1874                 goto free_dma_pool;
1875
1876         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1877         if (!hdev->cpu_accessible_dma_pool) {
1878                 dev_err(hdev->dev,
1879                         "Failed to create CPU accessible DMA pool\n");
1880                 rc = -ENOMEM;
1881                 goto free_cpu_dma_mem;
1882         }
1883
1884         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1885                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1886                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1887         if (rc) {
1888                 dev_err(hdev->dev,
1889                         "Failed to add memory to CPU accessible DMA pool\n");
1890                 rc = -EFAULT;
1891                 goto free_cpu_accessible_dma_pool;
1892         }
1893
1894         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1895         if (rc)
1896                 goto free_cpu_accessible_dma_pool;
1897
1898         spin_lock_init(&gaudi->hw_queues_lock);
1899         mutex_init(&gaudi->clk_gate_mutex);
1900
1901         hdev->supports_sync_stream = true;
1902         hdev->supports_coresight = true;
1903         hdev->supports_staged_submission = true;
1904         hdev->supports_wait_for_multi_cs = true;
1905
1906         hdev->asic_funcs->set_pci_memory_regions(hdev);
1907         hdev->stream_master_qid_arr =
1908                                 hdev->asic_funcs->get_stream_master_qid_arr();
1909         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1910
1911         return 0;
1912
1913 free_cpu_accessible_dma_pool:
1914         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1915 free_cpu_dma_mem:
1916         if (!hdev->asic_prop.fw_security_enabled)
1917                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1918                                         hdev->cpu_pci_msb_addr);
1919         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1920                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1921                         hdev->cpu_accessible_dma_mem,
1922                         hdev->cpu_accessible_dma_address);
1923 free_dma_pool:
1924         dma_pool_destroy(hdev->dma_pool);
1925 free_gaudi_device:
1926         kfree(gaudi);
1927         return rc;
1928 }
1929
1930 static int gaudi_sw_fini(struct hl_device *hdev)
1931 {
1932         struct gaudi_device *gaudi = hdev->asic_specific;
1933
1934         gaudi_free_internal_qmans_pq_mem(hdev);
1935
1936         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1937
1938         if (!hdev->asic_prop.fw_security_enabled)
1939                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1940                                         hdev->cpu_pci_msb_addr);
1941
1942         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1943                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1944                         hdev->cpu_accessible_dma_mem,
1945                         hdev->cpu_accessible_dma_address);
1946
1947         dma_pool_destroy(hdev->dma_pool);
1948
1949         mutex_destroy(&gaudi->clk_gate_mutex);
1950
1951         kfree(gaudi);
1952
1953         return 0;
1954 }
1955
1956 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1957 {
1958         struct hl_device *hdev = arg;
1959         int i;
1960
1961         if (hdev->disabled)
1962                 return IRQ_HANDLED;
1963
1964         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1965                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1966
1967         hl_irq_handler_eq(irq, &hdev->event_queue);
1968
1969         return IRQ_HANDLED;
1970 }
1971
1972 /*
1973  * For backward compatibility, new MSI interrupts should be set after the
1974  * existing CPU and NIC interrupts.
1975  */
1976 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1977                                 bool cpu_eq)
1978 {
1979         int msi_vec;
1980
1981         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1982                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1983                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1984
1985         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1986                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1987
1988         return pci_irq_vector(hdev->pdev, msi_vec);
1989 }
1990
1991 static int gaudi_enable_msi_single(struct hl_device *hdev)
1992 {
1993         int rc, irq;
1994
1995         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1996
1997         irq = gaudi_pci_irq_vector(hdev, 0, false);
1998         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1999                         "gaudi single msi", hdev);
2000         if (rc)
2001                 dev_err(hdev->dev,
2002                         "Failed to request single MSI IRQ\n");
2003
2004         return rc;
2005 }
2006
2007 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2008 {
2009         int cq_cnt = hdev->asic_prop.completion_queues_count;
2010         int rc, i, irq_cnt_init, irq;
2011
2012         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2013                 irq = gaudi_pci_irq_vector(hdev, i, false);
2014                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2015                                 &hdev->completion_queue[i]);
2016                 if (rc) {
2017                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2018                         goto free_irqs;
2019                 }
2020         }
2021
2022         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2023         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2024                                 &hdev->event_queue);
2025         if (rc) {
2026                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2027                 goto free_irqs;
2028         }
2029
2030         return 0;
2031
2032 free_irqs:
2033         for (i = 0 ; i < irq_cnt_init ; i++)
2034                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2035                                 &hdev->completion_queue[i]);
2036         return rc;
2037 }
2038
2039 static int gaudi_enable_msi(struct hl_device *hdev)
2040 {
2041         struct gaudi_device *gaudi = hdev->asic_specific;
2042         int rc;
2043
2044         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2045                 return 0;
2046
2047         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2048         if (rc < 0) {
2049                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2050                 return rc;
2051         }
2052
2053         if (rc < NUMBER_OF_INTERRUPTS) {
2054                 gaudi->multi_msi_mode = false;
2055                 rc = gaudi_enable_msi_single(hdev);
2056         } else {
2057                 gaudi->multi_msi_mode = true;
2058                 rc = gaudi_enable_msi_multi(hdev);
2059         }
2060
2061         if (rc)
2062                 goto free_pci_irq_vectors;
2063
2064         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2065
2066         return 0;
2067
2068 free_pci_irq_vectors:
2069         pci_free_irq_vectors(hdev->pdev);
2070         return rc;
2071 }
2072
2073 static void gaudi_sync_irqs(struct hl_device *hdev)
2074 {
2075         struct gaudi_device *gaudi = hdev->asic_specific;
2076         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2077
2078         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2079                 return;
2080
2081         /* Wait for all pending IRQs to be finished */
2082         if (gaudi->multi_msi_mode) {
2083                 for (i = 0 ; i < cq_cnt ; i++)
2084                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2085
2086                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2087                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2088                                                 true));
2089         } else {
2090                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2091         }
2092 }
2093
2094 static void gaudi_disable_msi(struct hl_device *hdev)
2095 {
2096         struct gaudi_device *gaudi = hdev->asic_specific;
2097         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2098
2099         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2100                 return;
2101
2102         gaudi_sync_irqs(hdev);
2103
2104         if (gaudi->multi_msi_mode) {
2105                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2106                                                 true);
2107                 free_irq(irq, &hdev->event_queue);
2108
2109                 for (i = 0 ; i < cq_cnt ; i++) {
2110                         irq = gaudi_pci_irq_vector(hdev, i, false);
2111                         free_irq(irq, &hdev->completion_queue[i]);
2112                 }
2113         } else {
2114                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2115         }
2116
2117         pci_free_irq_vectors(hdev->pdev);
2118
2119         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2120 }
2121
2122 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2123 {
2124         struct gaudi_device *gaudi = hdev->asic_specific;
2125
2126         if (hdev->asic_prop.fw_security_enabled)
2127                 return;
2128
2129         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2130                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2131                 return;
2132
2133         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2134                 return;
2135
2136         if (!hdev->sram_scrambler_enable)
2137                 return;
2138
2139         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2140                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2142                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2144                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2146                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2148                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2150                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2152                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2154                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2155
2156         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2157                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2159                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2161                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2163                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2165                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2167                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2168         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2169                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2170         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2171                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2172
2173         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2174                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2175         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2176                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2177         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2178                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2179         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2180                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2181         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2182                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2183         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2184                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2185         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2186                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2187         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2188                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2189
2190         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2191 }
2192
2193 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2194 {
2195         struct gaudi_device *gaudi = hdev->asic_specific;
2196
2197         if (hdev->asic_prop.fw_security_enabled)
2198                 return;
2199
2200         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2201                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2202                 return;
2203
2204         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2205                 return;
2206
2207         if (!hdev->dram_scrambler_enable)
2208                 return;
2209
2210         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2211                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2213                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2215                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2217                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2219                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2221                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2223                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2225                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226
2227         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2228                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2230                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2232                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2234                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2236                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2238                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2240                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2242                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2243
2244         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2245                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2246         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2247                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2248         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2249                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2250         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2251                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2252         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2253                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2254         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2255                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2256         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2257                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2259                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2260
2261         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2262 }
2263
2264 static void gaudi_init_e2e(struct hl_device *hdev)
2265 {
2266         if (hdev->asic_prop.fw_security_enabled)
2267                 return;
2268
2269         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2270                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2271                 return;
2272
2273         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2274         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2275         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2276         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2277
2278         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2279         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2280         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2281         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2282
2283         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2284         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2285         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2286         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2287
2288         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2289         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2290         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2291         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2292
2293         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2294         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2295         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2296         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2297
2298         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2299         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2300         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2301         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2302
2303         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2304         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2305         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2306         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2307
2308         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2309         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2310         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2311         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2312
2313         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2314         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2315         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2316         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2317
2318         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2319         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2320         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2321         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2322
2323         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2324         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2325         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2326         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2327
2328         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2329         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2330         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2331         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2332
2333         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2334         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2335         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2336         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2337
2338         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2339         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2340         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2341         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2342
2343         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2344         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2345         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2346         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2347
2348         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2349         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2350         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2351         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2352
2353         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2354         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2355         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2356         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2357
2358         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2359         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2360         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2361         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2362
2363         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2364         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2365         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2366         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2367
2368         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2369         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2370         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2371         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2372
2373         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2374         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2375         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2376         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2377
2378         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2379         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2380         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2381         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2382
2383         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2384         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2385         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2386         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2387
2388         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2389         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2390         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2391         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2392
2393         if (!hdev->dram_scrambler_enable) {
2394                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2395                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2396                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2397                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2398
2399                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2400                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2401                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2402                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2403
2404                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2405                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2406                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2407                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2408
2409                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2410                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2411                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2412                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2413
2414                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2415                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2416                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2417                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2418
2419                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2420                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2421                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2422                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2423
2424                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2425                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2426                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2427                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2428
2429                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2430                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2431                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2432                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2433
2434                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2435                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2436                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2437                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2438
2439                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2440                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2441                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2442                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2443
2444                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2445                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2446                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2447                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2448
2449                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2450                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2451                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2452                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2453
2454                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2455                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2456                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2457                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2458
2459                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2460                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2461                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2462                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2463
2464                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2465                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2466                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2467                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2468
2469                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2470                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2471                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2472                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2473
2474                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2475                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2476                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2477                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2478
2479                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2480                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2481                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2482                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2483
2484                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2485                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2486                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2487                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2488
2489                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2490                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2491                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2492                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2493
2494                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2495                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2496                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2497                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2498
2499                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2500                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2501                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2502                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2503
2504                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2505                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2506                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2507                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2508
2509                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2510                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2511                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2512                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2513         }
2514
2515         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2516                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2517         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2518                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2519
2520         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2521                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2522         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2523                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2524
2525         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2526                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2527         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2528                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2529
2530         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2531                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2532         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2533                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2534
2535         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2536                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2537         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2538                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2539
2540         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2541                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2542         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2543                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2544
2545         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2546                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2547         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2548                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2549
2550         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2551                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2552         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2553                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2554
2555         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2556                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2557         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2558                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2559
2560         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2561                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2562         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2563                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2564
2565         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2566                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2567         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2568                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2569
2570         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2571                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2572         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2573                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2574
2575         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2576                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2577         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2578                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2579
2580         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2581                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2582         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2583                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2584
2585         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2586                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2587         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2588                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2589
2590         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2591                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2592         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2593                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2594
2595         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2596                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2597         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2598                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2599
2600         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2601                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2602         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2603                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2604
2605         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2606                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2607         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2608                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2609
2610         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2611                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2612         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2613                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2614
2615         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2616                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2617         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2618                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2619
2620         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2621                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2622         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2623                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2624
2625         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2626                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2627         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2628                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2629
2630         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2631                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2632         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2633                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2634 }
2635
2636 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2637 {
2638         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2639
2640         if (hdev->asic_prop.fw_security_enabled)
2641                 return;
2642
2643         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2644                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2645                 return;
2646
2647         hbm0_wr = 0x33333333;
2648         hbm0_rd = 0x77777777;
2649         hbm1_wr = 0x55555555;
2650         hbm1_rd = 0xDDDDDDDD;
2651
2652         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2653         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2654         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2655         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2656
2657         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2658         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2659         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2660         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2661
2662         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2663         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2664         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2665         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2666
2667         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2668         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2669         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2670         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2671
2672         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2673                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2676                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2679                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2680                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2681         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2682                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2683                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2684
2685         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2686                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2687                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2688         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2689                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2690                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2691         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2692                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2693                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2694         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2695                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2696                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2697 }
2698
2699 static void gaudi_init_golden_registers(struct hl_device *hdev)
2700 {
2701         u32 tpc_offset;
2702         int tpc_id, i;
2703
2704         gaudi_init_e2e(hdev);
2705         gaudi_init_hbm_cred(hdev);
2706
2707         for (tpc_id = 0, tpc_offset = 0;
2708                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2709                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2710                 /* Mask all arithmetic interrupts from TPC */
2711                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2712                 /* Set 16 cache lines */
2713                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2714                                 ICACHE_FETCH_LINE_NUM, 2);
2715         }
2716
2717         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2718         for (i = 0 ; i < 128 ; i += 8)
2719                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2720
2721         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2722         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2723         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2724         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2725 }
2726
2727 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2728                                         int qman_id, dma_addr_t qman_pq_addr)
2729 {
2730         struct cpu_dyn_regs *dyn_regs =
2731                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2732         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2733         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2734         u32 q_off, dma_qm_offset;
2735         u32 dma_qm_err_cfg, irq_handler_offset;
2736
2737         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738
2739         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2740                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2742                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2743         so_base_en_lo = lower_32_bits(CFG_BASE +
2744                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745         so_base_en_hi = upper_32_bits(CFG_BASE +
2746                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2747         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2748                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2750                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2751         so_base_ws_lo = lower_32_bits(CFG_BASE +
2752                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753         so_base_ws_hi = upper_32_bits(CFG_BASE +
2754                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2755
2756         q_off = dma_qm_offset + qman_id * 4;
2757
2758         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2759         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2760
2761         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2762         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2763         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764
2765         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2766         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2767                                                         QMAN_LDMA_SRC_OFFSET);
2768         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2769                                                         QMAN_LDMA_DST_OFFSET);
2770
2771         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2772         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2773         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2774         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2775         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2776         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2777         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2778         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2779
2780         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2781
2782         /* The following configuration is needed only once per QMAN */
2783         if (qman_id == 0) {
2784                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2785                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2786                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2787
2788                 /* Configure RAZWI IRQ */
2789                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2790                 if (hdev->stop_on_err)
2791                         dma_qm_err_cfg |=
2792                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2793
2794                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2795
2796                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2797                         lower_32_bits(CFG_BASE + irq_handler_offset));
2798                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2799                         upper_32_bits(CFG_BASE + irq_handler_offset));
2800
2801                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2802                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2803                                                                         dma_id);
2804
2805                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2806                                 QM_ARB_ERR_MSG_EN_MASK);
2807
2808                 /* Increase ARB WDT to support streams architecture */
2809                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2810                                 GAUDI_ARB_WDT_TIMEOUT);
2811
2812                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2813                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2814
2815                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2816         }
2817 }
2818
2819 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2820 {
2821         struct cpu_dyn_regs *dyn_regs =
2822                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2823         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2824         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2825         u32 irq_handler_offset;
2826
2827         /* Set to maximum possible according to physical size */
2828         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2829         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2830
2831         /* WA for H/W bug H3-2116 */
2832         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2833
2834         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2835         if (hdev->stop_on_err)
2836                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2837
2838         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2839
2840         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2841                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2842                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2843
2844         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2845                 lower_32_bits(CFG_BASE + irq_handler_offset));
2846         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2847                 upper_32_bits(CFG_BASE + irq_handler_offset));
2848
2849         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2850                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2851         WREG32(mmDMA0_CORE_PROT + dma_offset,
2852                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2853         /* If the channel is secured, it should be in MMU bypass mode */
2854         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2855                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2856         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2857 }
2858
2859 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2860                                 u32 enable_mask)
2861 {
2862         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2863
2864         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2865 }
2866
2867 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2868 {
2869         struct gaudi_device *gaudi = hdev->asic_specific;
2870         struct hl_hw_queue *q;
2871         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2872
2873         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2874                 return;
2875
2876         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2877                 dma_id = gaudi_dma_assignment[i];
2878                 /*
2879                  * For queues after the CPU Q need to add 1 to get the correct
2880                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2881                  * order to get the correct MSI register.
2882                  */
2883                 if (dma_id > 1) {
2884                         cpu_skip = 1;
2885                         nic_skip = NIC_NUMBER_OF_ENGINES;
2886                 } else {
2887                         cpu_skip = 0;
2888                         nic_skip = 0;
2889                 }
2890
2891                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2892                         q_idx = 4 * dma_id + j + cpu_skip;
2893                         q = &hdev->kernel_queues[q_idx];
2894                         q->cq_id = cq_id++;
2895                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2896                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2897                                                 q->bus_address);
2898                 }
2899
2900                 gaudi_init_dma_core(hdev, dma_id);
2901
2902                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2903         }
2904
2905         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2906 }
2907
2908 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2909                                         int qman_id, u64 qman_base_addr)
2910 {
2911         struct cpu_dyn_regs *dyn_regs =
2912                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2913         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2914         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2915         u32 dma_qm_err_cfg, irq_handler_offset;
2916         u32 q_off, dma_qm_offset;
2917
2918         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2919
2920         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2921                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2922         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2923                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2924         so_base_en_lo = lower_32_bits(CFG_BASE +
2925                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2926         so_base_en_hi = upper_32_bits(CFG_BASE +
2927                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2928         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2929                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2930         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2931                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2932         so_base_ws_lo = lower_32_bits(CFG_BASE +
2933                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2934         so_base_ws_hi = upper_32_bits(CFG_BASE +
2935                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2936
2937         q_off = dma_qm_offset + qman_id * 4;
2938
2939         if (qman_id < 4) {
2940                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2941                                         lower_32_bits(qman_base_addr));
2942                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2943                                         upper_32_bits(qman_base_addr));
2944
2945                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2946                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2947                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2948
2949                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2950                                                         QMAN_CPDMA_SIZE_OFFSET);
2951                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2952                                                         QMAN_CPDMA_SRC_OFFSET);
2953                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2954                                                         QMAN_CPDMA_DST_OFFSET);
2955         } else {
2956                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2957                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2958                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2959
2960                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2961                                                         QMAN_LDMA_SIZE_OFFSET);
2962                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2963                                                         QMAN_LDMA_SRC_OFFSET);
2964                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2965                                                         QMAN_LDMA_DST_OFFSET);
2966
2967                 /* Configure RAZWI IRQ */
2968                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2969                 if (hdev->stop_on_err)
2970                         dma_qm_err_cfg |=
2971                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2972
2973                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2974
2975                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2976                         lower_32_bits(CFG_BASE + irq_handler_offset));
2977                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2978                         upper_32_bits(CFG_BASE + irq_handler_offset));
2979
2980                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2981                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2982                                                                         dma_id);
2983
2984                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2985                                 QM_ARB_ERR_MSG_EN_MASK);
2986
2987                 /* Increase ARB WDT to support streams architecture */
2988                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2989                                 GAUDI_ARB_WDT_TIMEOUT);
2990
2991                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2992                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2993                                 QMAN_INTERNAL_MAKE_TRUSTED);
2994         }
2995
2996         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2997         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2998         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2999         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3000
3001         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
3002         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
3003                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3004                                 mtr_base_ws_lo);
3005                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3006                                 mtr_base_ws_hi);
3007                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3008                                 so_base_ws_lo);
3009                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3010                                 so_base_ws_hi);
3011         }
3012 }
3013
3014 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
3015 {
3016         struct gaudi_device *gaudi = hdev->asic_specific;
3017         struct gaudi_internal_qman_info *q;
3018         u64 qman_base_addr;
3019         int i, j, dma_id, internal_q_index;
3020
3021         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3022                 return;
3023
3024         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3025                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3026
3027                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3028                          /*
3029                           * Add the CPU queue in order to get the correct queue
3030                           * number as all internal queue are placed after it
3031                           */
3032                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3033
3034                         q = &gaudi->internal_qmans[internal_q_index];
3035                         qman_base_addr = (u64) q->pq_dma_addr;
3036                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3037                                                 qman_base_addr);
3038                 }
3039
3040                 /* Initializing lower CP for HBM DMA QMAN */
3041                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3042
3043                 gaudi_init_dma_core(hdev, dma_id);
3044
3045                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3046         }
3047
3048         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3049 }
3050
3051 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3052                                         int qman_id, u64 qman_base_addr)
3053 {
3054         struct cpu_dyn_regs *dyn_regs =
3055                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3056         u32 mtr_base_lo, mtr_base_hi;
3057         u32 so_base_lo, so_base_hi;
3058         u32 irq_handler_offset;
3059         u32 q_off, mme_id;
3060         u32 mme_qm_err_cfg;
3061
3062         mtr_base_lo = lower_32_bits(CFG_BASE +
3063                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3064         mtr_base_hi = upper_32_bits(CFG_BASE +
3065                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3066         so_base_lo = lower_32_bits(CFG_BASE +
3067                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3068         so_base_hi = upper_32_bits(CFG_BASE +
3069                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3070
3071         q_off = mme_offset + qman_id * 4;
3072
3073         if (qman_id < 4) {
3074                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3075                                         lower_32_bits(qman_base_addr));
3076                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3077                                         upper_32_bits(qman_base_addr));
3078
3079                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3080                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3081                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3082
3083                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3084                                                         QMAN_CPDMA_SIZE_OFFSET);
3085                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3086                                                         QMAN_CPDMA_SRC_OFFSET);
3087                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3088                                                         QMAN_CPDMA_DST_OFFSET);
3089         } else {
3090                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3091                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3092                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3093
3094                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3095                                                         QMAN_LDMA_SIZE_OFFSET);
3096                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3097                                                         QMAN_LDMA_SRC_OFFSET);
3098                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3099                                                         QMAN_LDMA_DST_OFFSET);
3100
3101                 /* Configure RAZWI IRQ */
3102                 mme_id = mme_offset /
3103                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3104
3105                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3106                 if (hdev->stop_on_err)
3107                         mme_qm_err_cfg |=
3108                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3109
3110                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3111
3112                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3113                         lower_32_bits(CFG_BASE + irq_handler_offset));
3114                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3115                         upper_32_bits(CFG_BASE + irq_handler_offset));
3116
3117                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3118                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3119                                                                         mme_id);
3120
3121                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3122                                 QM_ARB_ERR_MSG_EN_MASK);
3123
3124                 /* Increase ARB WDT to support streams architecture */
3125                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3126                                 GAUDI_ARB_WDT_TIMEOUT);
3127
3128                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3129                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3130                                 QMAN_INTERNAL_MAKE_TRUSTED);
3131         }
3132
3133         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3134         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3135         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3136         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3137 }
3138
3139 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3140 {
3141         struct gaudi_device *gaudi = hdev->asic_specific;
3142         struct gaudi_internal_qman_info *q;
3143         u64 qman_base_addr;
3144         u32 mme_offset;
3145         int i, internal_q_index;
3146
3147         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3148                 return;
3149
3150         /*
3151          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3152          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3153          */
3154
3155         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3156
3157         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3158                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3159                 q = &gaudi->internal_qmans[internal_q_index];
3160                 qman_base_addr = (u64) q->pq_dma_addr;
3161                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3162                                         qman_base_addr);
3163                 if (i == 3)
3164                         mme_offset = 0;
3165         }
3166
3167         /* Initializing lower CP for MME QMANs */
3168         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3169         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3170         gaudi_init_mme_qman(hdev, 0, 4, 0);
3171
3172         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3173         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3174
3175         gaudi->hw_cap_initialized |= HW_CAP_MME;
3176 }
3177
3178 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3179                                 int qman_id, u64 qman_base_addr)
3180 {
3181         struct cpu_dyn_regs *dyn_regs =
3182                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3183         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3184         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3185         u32 tpc_qm_err_cfg, irq_handler_offset;
3186         u32 q_off, tpc_id;
3187
3188         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3189                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3190         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3191                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3192         so_base_en_lo = lower_32_bits(CFG_BASE +
3193                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3194         so_base_en_hi = upper_32_bits(CFG_BASE +
3195                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3196         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3197                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3198         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3199                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3200         so_base_ws_lo = lower_32_bits(CFG_BASE +
3201                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3202         so_base_ws_hi = upper_32_bits(CFG_BASE +
3203                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3204
3205         q_off = tpc_offset + qman_id * 4;
3206
3207         tpc_id = tpc_offset /
3208                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3209
3210         if (qman_id < 4) {
3211                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3212                                         lower_32_bits(qman_base_addr));
3213                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3214                                         upper_32_bits(qman_base_addr));
3215
3216                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3217                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3218                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3219
3220                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3221                                                         QMAN_CPDMA_SIZE_OFFSET);
3222                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3223                                                         QMAN_CPDMA_SRC_OFFSET);
3224                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3225                                                         QMAN_CPDMA_DST_OFFSET);
3226         } else {
3227                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3228                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3229                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3230
3231                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3232                                                         QMAN_LDMA_SIZE_OFFSET);
3233                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3234                                                         QMAN_LDMA_SRC_OFFSET);
3235                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3236                                                         QMAN_LDMA_DST_OFFSET);
3237
3238                 /* Configure RAZWI IRQ */
3239                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3240                 if (hdev->stop_on_err)
3241                         tpc_qm_err_cfg |=
3242                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3243
3244                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3245
3246                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3247                         lower_32_bits(CFG_BASE + irq_handler_offset));
3248                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3249                         upper_32_bits(CFG_BASE + irq_handler_offset));
3250
3251                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3252                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3253                                                                         tpc_id);
3254
3255                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3256                                 QM_ARB_ERR_MSG_EN_MASK);
3257
3258                 /* Increase ARB WDT to support streams architecture */
3259                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3260                                 GAUDI_ARB_WDT_TIMEOUT);
3261
3262                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3263                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3264                                 QMAN_INTERNAL_MAKE_TRUSTED);
3265         }
3266
3267         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3268         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3269         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3270         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3271
3272         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3273         if (tpc_id == 6) {
3274                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3275                                 mtr_base_ws_lo);
3276                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3277                                 mtr_base_ws_hi);
3278                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3279                                 so_base_ws_lo);
3280                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3281                                 so_base_ws_hi);
3282         }
3283 }
3284
3285 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3286 {
3287         struct gaudi_device *gaudi = hdev->asic_specific;
3288         struct gaudi_internal_qman_info *q;
3289         u64 qman_base_addr;
3290         u32 so_base_hi, tpc_offset = 0;
3291         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3292                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3293         int i, tpc_id, internal_q_index;
3294
3295         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3296                 return;
3297
3298         so_base_hi = upper_32_bits(CFG_BASE +
3299                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3300
3301         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3302                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3303                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3304                                                 tpc_id * QMAN_STREAMS + i;
3305                         q = &gaudi->internal_qmans[internal_q_index];
3306                         qman_base_addr = (u64) q->pq_dma_addr;
3307                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3308                                                 qman_base_addr);
3309
3310                         if (i == 3) {
3311                                 /* Initializing lower CP for TPC QMAN */
3312                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3313
3314                                 /* Enable the QMAN and TPC channel */
3315                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3316                                                 QMAN_TPC_ENABLE);
3317                         }
3318                 }
3319
3320                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3321                                 so_base_hi);
3322
3323                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3324
3325                 gaudi->hw_cap_initialized |=
3326                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3327         }
3328 }
3329
3330 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3331                                 int qman_id, u64 qman_base_addr, int nic_id)
3332 {
3333         struct cpu_dyn_regs *dyn_regs =
3334                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3335         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3336         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3337         u32 nic_qm_err_cfg, irq_handler_offset;
3338         u32 q_off;
3339
3340         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3341                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3342         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3343                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3344         so_base_en_lo = lower_32_bits(CFG_BASE +
3345                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3346         so_base_en_hi = upper_32_bits(CFG_BASE +
3347                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3348         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3349                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3350         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3351                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3352         so_base_ws_lo = lower_32_bits(CFG_BASE +
3353                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3354         so_base_ws_hi = upper_32_bits(CFG_BASE +
3355                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3356
3357         q_off = nic_offset + qman_id * 4;
3358
3359         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3360         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3361
3362         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3363         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3364         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3365
3366         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3367                                                         QMAN_LDMA_SIZE_OFFSET);
3368         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3369                                                         QMAN_LDMA_SRC_OFFSET);
3370         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3371                                                         QMAN_LDMA_DST_OFFSET);
3372
3373         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3374         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3375         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3376         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3377
3378         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3379         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3380         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3381         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3382         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3383
3384         if (qman_id == 0) {
3385                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3386                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3387                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3388
3389                 /* Configure RAZWI IRQ */
3390                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3391                 if (hdev->stop_on_err)
3392                         nic_qm_err_cfg |=
3393                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3394
3395                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3396
3397                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3398                         lower_32_bits(CFG_BASE + irq_handler_offset));
3399                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3400                         upper_32_bits(CFG_BASE + irq_handler_offset));
3401
3402                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3403                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3404                                                                         nic_id);
3405
3406                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3407                                 QM_ARB_ERR_MSG_EN_MASK);
3408
3409                 /* Increase ARB WDT to support streams architecture */
3410                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3411                                 GAUDI_ARB_WDT_TIMEOUT);
3412
3413                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3414                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3415                                 QMAN_INTERNAL_MAKE_TRUSTED);
3416         }
3417 }
3418
3419 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3420 {
3421         struct gaudi_device *gaudi = hdev->asic_specific;
3422         struct gaudi_internal_qman_info *q;
3423         u64 qman_base_addr;
3424         u32 nic_offset = 0;
3425         u32 nic_delta_between_qmans =
3426                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3427         u32 nic_delta_between_nics =
3428                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3429         int i, nic_id, internal_q_index;
3430
3431         if (!hdev->nic_ports_mask)
3432                 return;
3433
3434         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3435                 return;
3436
3437         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3438
3439         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3440                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3441                         nic_offset += nic_delta_between_qmans;
3442                         if (nic_id & 1) {
3443                                 nic_offset -= (nic_delta_between_qmans * 2);
3444                                 nic_offset += nic_delta_between_nics;
3445                         }
3446                         continue;
3447                 }
3448
3449                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3450                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3451                                                 nic_id * QMAN_STREAMS + i;
3452                         q = &gaudi->internal_qmans[internal_q_index];
3453                         qman_base_addr = (u64) q->pq_dma_addr;
3454                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3455                                                 qman_base_addr, nic_id);
3456                 }
3457
3458                 /* Enable the QMAN */
3459                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3460
3461                 nic_offset += nic_delta_between_qmans;
3462                 if (nic_id & 1) {
3463                         nic_offset -= (nic_delta_between_qmans * 2);
3464                         nic_offset += nic_delta_between_nics;
3465                 }
3466
3467                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3468         }
3469 }
3470
3471 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3472 {
3473         struct gaudi_device *gaudi = hdev->asic_specific;
3474
3475         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3476                 return;
3477
3478         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3479         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3480         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3481 }
3482
3483 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3484 {
3485         struct gaudi_device *gaudi = hdev->asic_specific;
3486
3487         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3488                 return;
3489
3490         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3491         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3492         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3493         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3494         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3495 }
3496
3497 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3498 {
3499         struct gaudi_device *gaudi = hdev->asic_specific;
3500
3501         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3502                 return;
3503
3504         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3505         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3506 }
3507
3508 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3509 {
3510         struct gaudi_device *gaudi = hdev->asic_specific;
3511         u32 tpc_offset = 0;
3512         int tpc_id;
3513
3514         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3515                 return;
3516
3517         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3518                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3519                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3520         }
3521 }
3522
3523 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3524 {
3525         struct gaudi_device *gaudi = hdev->asic_specific;
3526         u32 nic_mask, nic_offset = 0;
3527         u32 nic_delta_between_qmans =
3528                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3529         u32 nic_delta_between_nics =
3530                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3531         int nic_id;
3532
3533         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3534                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3535
3536                 if (gaudi->hw_cap_initialized & nic_mask)
3537                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3538
3539                 nic_offset += nic_delta_between_qmans;
3540                 if (nic_id & 1) {
3541                         nic_offset -= (nic_delta_between_qmans * 2);
3542                         nic_offset += nic_delta_between_nics;
3543                 }
3544         }
3545 }
3546
3547 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3548 {
3549         struct gaudi_device *gaudi = hdev->asic_specific;
3550
3551         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3552                 return;
3553
3554         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3555         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3556         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3557         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3558 }
3559
3560 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3561 {
3562         struct gaudi_device *gaudi = hdev->asic_specific;
3563
3564         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3565                 return;
3566
3567         /* Stop CPs of HBM DMA QMANs */
3568
3569         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3570         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3571         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3572         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3573         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3574 }
3575
3576 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3577 {
3578         struct gaudi_device *gaudi = hdev->asic_specific;
3579
3580         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3581                 return;
3582
3583         /* Stop CPs of MME QMANs */
3584         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3585         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3586 }
3587
3588 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3589 {
3590         struct gaudi_device *gaudi = hdev->asic_specific;
3591
3592         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3593                 return;
3594
3595         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3596         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3597         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3598         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3599         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3600         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3601         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3602         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3603 }
3604
3605 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3606 {
3607         struct gaudi_device *gaudi = hdev->asic_specific;
3608
3609         /* Stop upper CPs of QMANs */
3610
3611         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3612                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3613                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3614                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3615                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3616
3617         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3618                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3619                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3620                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3621                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3622
3623         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3624                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3625                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3626                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3627                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3628
3629         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3630                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3631                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3632                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3633                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3634
3635         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3636                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3637                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3638                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3639                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3640
3641         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3642                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3643                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3644                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3645                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3646
3647         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3648                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3649                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3650                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3651                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3652
3653         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3654                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3655                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3656                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3657                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3658
3659         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3660                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3661                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3662                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3663                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3664
3665         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3666                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3667                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3668                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3669                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3670 }
3671
3672 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3673 {
3674         struct gaudi_device *gaudi = hdev->asic_specific;
3675
3676         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3677                 return;
3678
3679         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3680         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3681         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3682 }
3683
3684 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3685 {
3686         struct gaudi_device *gaudi = hdev->asic_specific;
3687
3688         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3689                 return;
3690
3691         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3692         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3693         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3694         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3695         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3696 }
3697
3698 static void gaudi_mme_stall(struct hl_device *hdev)
3699 {
3700         struct gaudi_device *gaudi = hdev->asic_specific;
3701
3702         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3703                 return;
3704
3705         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3706         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3707         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3708         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3709         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3710         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3711         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3712         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3713         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3714         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3715         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3716         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3717         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3718         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3719         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3720         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3721         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3722 }
3723
3724 static void gaudi_tpc_stall(struct hl_device *hdev)
3725 {
3726         struct gaudi_device *gaudi = hdev->asic_specific;
3727
3728         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3729                 return;
3730
3731         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3732         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3733         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3734         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3735         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3736         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3737         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3738         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3739 }
3740
3741 static void gaudi_set_clock_gating(struct hl_device *hdev)
3742 {
3743 }
3744
3745 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3746 {
3747         u32 qman_offset;
3748         int i;
3749
3750         if (hdev->asic_prop.fw_security_enabled)
3751                 return;
3752
3753         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3754                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3755                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3756
3757                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3758         }
3759
3760         WREG32(mmMME0_QM_CGM_CFG, 0);
3761         WREG32(mmMME0_QM_CGM_CFG1, 0);
3762         WREG32(mmMME2_QM_CGM_CFG, 0);
3763         WREG32(mmMME2_QM_CGM_CFG1, 0);
3764
3765         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3766                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3767                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3768
3769                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3770         }
3771 }
3772
3773 static void gaudi_enable_timestamp(struct hl_device *hdev)
3774 {
3775         /* Disable the timestamp counter */
3776         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3777
3778         /* Zero the lower/upper parts of the 64-bit counter */
3779         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3780         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3781
3782         /* Enable the counter */
3783         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3784 }
3785
3786 static void gaudi_disable_timestamp(struct hl_device *hdev)
3787 {
3788         /* Disable the timestamp counter */
3789         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3790 }
3791
3792 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3793 {
3794         u32 wait_timeout_ms;
3795
3796         dev_info(hdev->dev,
3797                 "Halting compute engines and disabling interrupts\n");
3798
3799         if (hdev->pldm)
3800                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3801         else
3802                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3803
3804         if (fw_reset)
3805                 goto skip_engines;
3806
3807         gaudi_stop_nic_qmans(hdev);
3808         gaudi_stop_mme_qmans(hdev);
3809         gaudi_stop_tpc_qmans(hdev);
3810         gaudi_stop_hbm_dma_qmans(hdev);
3811         gaudi_stop_pci_dma_qmans(hdev);
3812
3813         hdev->asic_funcs->disable_clock_gating(hdev);
3814
3815         msleep(wait_timeout_ms);
3816
3817         gaudi_pci_dma_stall(hdev);
3818         gaudi_hbm_dma_stall(hdev);
3819         gaudi_tpc_stall(hdev);
3820         gaudi_mme_stall(hdev);
3821
3822         msleep(wait_timeout_ms);
3823
3824         gaudi_disable_nic_qmans(hdev);
3825         gaudi_disable_mme_qmans(hdev);
3826         gaudi_disable_tpc_qmans(hdev);
3827         gaudi_disable_hbm_dma_qmans(hdev);
3828         gaudi_disable_pci_dma_qmans(hdev);
3829
3830         gaudi_disable_timestamp(hdev);
3831
3832 skip_engines:
3833         gaudi_disable_msi(hdev);
3834 }
3835
3836 static int gaudi_mmu_init(struct hl_device *hdev)
3837 {
3838         struct asic_fixed_properties *prop = &hdev->asic_prop;
3839         struct gaudi_device *gaudi = hdev->asic_specific;
3840         u64 hop0_addr;
3841         int rc, i;
3842
3843         if (!hdev->mmu_enable)
3844                 return 0;
3845
3846         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3847                 return 0;
3848
3849         for (i = 0 ; i < prop->max_asid ; i++) {
3850                 hop0_addr = prop->mmu_pgt_addr +
3851                                 (i * prop->mmu_hop_table_size);
3852
3853                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3854                 if (rc) {
3855                         dev_err(hdev->dev,
3856                                 "failed to set hop0 addr for asid %d\n", i);
3857                         goto err;
3858                 }
3859         }
3860
3861         /* init MMU cache manage page */
3862         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3863         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3864
3865         /* mem cache invalidation */
3866         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3867
3868         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3869
3870         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3871         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3872
3873         WREG32(mmSTLB_HOP_CONFIGURATION,
3874                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3875
3876         /*
3877          * The H/W expects the first PI after init to be 1. After wraparound
3878          * we'll write 0.
3879          */
3880         gaudi->mmu_cache_inv_pi = 1;
3881
3882         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3883
3884         return 0;
3885
3886 err:
3887         return rc;
3888 }
3889
3890 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3891 {
3892         void __iomem *dst;
3893
3894         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3895
3896         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3897 }
3898
3899 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3900 {
3901         void __iomem *dst;
3902
3903         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3904
3905         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3906 }
3907
3908 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3909 {
3910         struct dynamic_fw_load_mgr *dynamic_loader;
3911         struct cpu_dyn_regs *dyn_regs;
3912
3913         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3914
3915         /*
3916          * here we update initial values for few specific dynamic regs (as
3917          * before reading the first descriptor from FW those value has to be
3918          * hard-coded) in later stages of the protocol those values will be
3919          * updated automatically by reading the FW descriptor so data there
3920          * will always be up-to-date
3921          */
3922         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3923         dyn_regs->kmd_msg_to_cpu =
3924                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3925         dyn_regs->cpu_cmd_status_to_host =
3926                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3927
3928         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3929 }
3930
3931 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3932 {
3933         struct static_fw_load_mgr *static_loader;
3934
3935         static_loader = &hdev->fw_loader.static_loader;
3936
3937         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3938         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3939         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3940         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3941         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3942         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3943         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3944         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3945         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3946         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3947         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3948         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3949         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3950                         GAUDI_PLDM_RESET_WAIT_MSEC :
3951                         GAUDI_CPU_RESET_WAIT_MSEC;
3952 }
3953
3954 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3955 {
3956         struct asic_fixed_properties *prop = &hdev->asic_prop;
3957         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3958
3959         /* fill common fields */
3960         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3961         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3962         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3963         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3964         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3965         fw_loader->skip_bmc = !hdev->bmc_enable;
3966         fw_loader->sram_bar_id = SRAM_BAR_ID;
3967         fw_loader->dram_bar_id = HBM_BAR_ID;
3968
3969         if (prop->dynamic_fw_load)
3970                 gaudi_init_dynamic_firmware_loader(hdev);
3971         else
3972                 gaudi_init_static_firmware_loader(hdev);
3973 }
3974
3975 static int gaudi_init_cpu(struct hl_device *hdev)
3976 {
3977         struct gaudi_device *gaudi = hdev->asic_specific;
3978         int rc;
3979
3980         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3981                 return 0;
3982
3983         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3984                 return 0;
3985
3986         /*
3987          * The device CPU works with 40 bits addresses.
3988          * This register sets the extension to 50 bits.
3989          */
3990         if (!hdev->asic_prop.fw_security_enabled)
3991                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3992
3993         rc = hl_fw_init_cpu(hdev);
3994
3995         if (rc)
3996                 return rc;
3997
3998         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3999
4000         return 0;
4001 }
4002
4003 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4004 {
4005         struct cpu_dyn_regs *dyn_regs =
4006                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4007         struct asic_fixed_properties *prop = &hdev->asic_prop;
4008         struct gaudi_device *gaudi = hdev->asic_specific;
4009         u32 status, irq_handler_offset;
4010         struct hl_eq *eq;
4011         struct hl_hw_queue *cpu_pq =
4012                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4013         int err;
4014
4015         if (!hdev->cpu_queues_enable)
4016                 return 0;
4017
4018         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4019                 return 0;
4020
4021         eq = &hdev->event_queue;
4022
4023         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4024         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4025
4026         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4027         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4028
4029         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4030                         lower_32_bits(hdev->cpu_accessible_dma_address));
4031         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4032                         upper_32_bits(hdev->cpu_accessible_dma_address));
4033
4034         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4035         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4036         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4037
4038         /* Used for EQ CI */
4039         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4040
4041         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4042
4043         if (gaudi->multi_msi_mode)
4044                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4045         else
4046                 WREG32(mmCPU_IF_QUEUE_INIT,
4047                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4048
4049         irq_handler_offset = prop->gic_interrupts_enable ?
4050                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4051                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4052
4053         WREG32(irq_handler_offset,
4054                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4055
4056         err = hl_poll_timeout(
4057                 hdev,
4058                 mmCPU_IF_QUEUE_INIT,
4059                 status,
4060                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4061                 1000,
4062                 cpu_timeout);
4063
4064         if (err) {
4065                 dev_err(hdev->dev,
4066                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4067                 return -EIO;
4068         }
4069
4070         /* update FW application security bits */
4071         if (prop->fw_cpu_boot_dev_sts0_valid)
4072                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4073         if (prop->fw_cpu_boot_dev_sts1_valid)
4074                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4075
4076         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4077         return 0;
4078 }
4079
4080 static void gaudi_pre_hw_init(struct hl_device *hdev)
4081 {
4082         /* Perform read from the device to make sure device is up */
4083         RREG32(mmHW_STATE);
4084
4085         if (!hdev->asic_prop.fw_security_enabled) {
4086                 /* Set the access through PCI bars (Linux driver only) as
4087                  * secured
4088                  */
4089                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4090                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4091                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4092
4093                 /* Perform read to flush the waiting writes to ensure
4094                  * configuration was set in the device
4095                  */
4096                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4097         }
4098
4099         /*
4100          * Let's mark in the H/W that we have reached this point. We check
4101          * this value in the reset_before_init function to understand whether
4102          * we need to reset the chip before doing H/W init. This register is
4103          * cleared by the H/W upon H/W reset
4104          */
4105         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4106 }
4107
4108 static int gaudi_hw_init(struct hl_device *hdev)
4109 {
4110         struct gaudi_device *gaudi = hdev->asic_specific;
4111         int rc;
4112
4113         gaudi_pre_hw_init(hdev);
4114
4115         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4116          * So we set it here and if anyone tries to move it later to
4117          * a different address, there will be an error
4118          */
4119         if (hdev->asic_prop.iatu_done_by_fw)
4120                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4121
4122         /*
4123          * Before pushing u-boot/linux to device, need to set the hbm bar to
4124          * base address of dram
4125          */
4126         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4127                 dev_err(hdev->dev,
4128                         "failed to map HBM bar to DRAM base address\n");
4129                 return -EIO;
4130         }
4131
4132         rc = gaudi_init_cpu(hdev);
4133         if (rc) {
4134                 dev_err(hdev->dev, "failed to initialize CPU\n");
4135                 return rc;
4136         }
4137
4138         /* In case the clock gating was enabled in preboot we need to disable
4139          * it here before touching the MME/TPC registers.
4140          * There is no need to take clk gating mutex because when this function
4141          * runs, no other relevant code can run
4142          */
4143         hdev->asic_funcs->disable_clock_gating(hdev);
4144
4145         /* SRAM scrambler must be initialized after CPU is running from HBM */
4146         gaudi_init_scrambler_sram(hdev);
4147
4148         /* This is here just in case we are working without CPU */
4149         gaudi_init_scrambler_hbm(hdev);
4150
4151         gaudi_init_golden_registers(hdev);
4152
4153         rc = gaudi_mmu_init(hdev);
4154         if (rc)
4155                 return rc;
4156
4157         gaudi_init_security(hdev);
4158
4159         gaudi_init_pci_dma_qmans(hdev);
4160
4161         gaudi_init_hbm_dma_qmans(hdev);
4162
4163         gaudi_init_mme_qmans(hdev);
4164
4165         gaudi_init_tpc_qmans(hdev);
4166
4167         gaudi_init_nic_qmans(hdev);
4168
4169         hdev->asic_funcs->set_clock_gating(hdev);
4170
4171         gaudi_enable_timestamp(hdev);
4172
4173         /* MSI must be enabled before CPU queues and NIC are initialized */
4174         rc = gaudi_enable_msi(hdev);
4175         if (rc)
4176                 goto disable_queues;
4177
4178         /* must be called after MSI was enabled */
4179         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4180         if (rc) {
4181                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4182                         rc);
4183                 goto disable_msi;
4184         }
4185
4186         /* Perform read from the device to flush all configuration */
4187         RREG32(mmHW_STATE);
4188
4189         return 0;
4190
4191 disable_msi:
4192         gaudi_disable_msi(hdev);
4193 disable_queues:
4194         gaudi_disable_mme_qmans(hdev);
4195         gaudi_disable_pci_dma_qmans(hdev);
4196
4197         return rc;
4198 }
4199
4200 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4201 {
4202         struct cpu_dyn_regs *dyn_regs =
4203                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4204         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4205         struct gaudi_device *gaudi = hdev->asic_specific;
4206         bool driver_performs_reset;
4207
4208         if (!hard_reset) {
4209                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4210                 return;
4211         }
4212
4213         if (hdev->pldm) {
4214                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4215                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4216         } else {
4217                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4218                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4219         }
4220
4221         if (fw_reset) {
4222                 dev_info(hdev->dev,
4223                         "Firmware performs HARD reset, going to wait %dms\n",
4224                         reset_timeout_ms);
4225
4226                 goto skip_reset;
4227         }
4228
4229         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4230                                         !hdev->asic_prop.hard_reset_done_by_fw);
4231
4232         /* Set device to handle FLR by H/W as we will put the device CPU to
4233          * halt mode
4234          */
4235         if (driver_performs_reset)
4236                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4237                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4238
4239         /* If linux is loaded in the device CPU we need to communicate with it
4240          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4241          * registers in case of old F/Ws
4242          */
4243         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4244                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4245                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4246                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4247
4248                 WREG32(irq_handler_offset,
4249                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4250
4251                 /* This is a hail-mary attempt to revive the card in the small chance that the
4252                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4253                  * In that case, triggering reset through GIC won't help. We need to trigger the
4254                  * reset as if Linux wasn't loaded.
4255                  *
4256                  * We do it only if the reset cause was HB, because that would be the indication
4257                  * of such an event.
4258                  *
4259                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4260                  * damage.
4261                  */
4262                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4263                         if (hdev->asic_prop.hard_reset_done_by_fw)
4264                                 hl_fw_ask_hard_reset_without_linux(hdev);
4265                         else
4266                                 hl_fw_ask_halt_machine_without_linux(hdev);
4267                 }
4268         } else {
4269                 if (hdev->asic_prop.hard_reset_done_by_fw)
4270                         hl_fw_ask_hard_reset_without_linux(hdev);
4271                 else
4272                         hl_fw_ask_halt_machine_without_linux(hdev);
4273         }
4274
4275         if (driver_performs_reset) {
4276
4277                 /* Configure the reset registers. Must be done as early as
4278                  * possible in case we fail during H/W initialization
4279                  */
4280                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4281                                                 (CFG_RST_H_DMA_MASK |
4282                                                 CFG_RST_H_MME_MASK |
4283                                                 CFG_RST_H_SM_MASK |
4284                                                 CFG_RST_H_TPC_7_MASK));
4285
4286                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4287
4288                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4289                                                 (CFG_RST_H_HBM_MASK |
4290                                                 CFG_RST_H_TPC_7_MASK |
4291                                                 CFG_RST_H_NIC_MASK |
4292                                                 CFG_RST_H_SM_MASK |
4293                                                 CFG_RST_H_DMA_MASK |
4294                                                 CFG_RST_H_MME_MASK |
4295                                                 CFG_RST_H_CPU_MASK |
4296                                                 CFG_RST_H_MMU_MASK));
4297
4298                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4299                                                 (CFG_RST_L_IF_MASK |
4300                                                 CFG_RST_L_PSOC_MASK |
4301                                                 CFG_RST_L_TPC_MASK));
4302
4303                 msleep(cpu_timeout_ms);
4304
4305                 /* Tell ASIC not to re-initialize PCIe */
4306                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4307
4308                 /* Restart BTL/BLR upon hard-reset */
4309                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4310
4311                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4312                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4313
4314                 dev_info(hdev->dev,
4315                         "Issued HARD reset command, going to wait %dms\n",
4316                         reset_timeout_ms);
4317         } else {
4318                 dev_info(hdev->dev,
4319                         "Firmware performs HARD reset, going to wait %dms\n",
4320                         reset_timeout_ms);
4321         }
4322
4323 skip_reset:
4324         /*
4325          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4326          * itself is in reset. Need to wait until the reset is deasserted
4327          */
4328         msleep(reset_timeout_ms);
4329
4330         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4331         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4332                 dev_err(hdev->dev,
4333                         "Timeout while waiting for device to reset 0x%x\n",
4334                         status);
4335
4336         if (gaudi) {
4337                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4338                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4339                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4340                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4341                                                 HW_CAP_HBM_SCRAMBLER);
4342
4343                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4344
4345                 hdev->device_cpu_is_halted = false;
4346         }
4347 }
4348
4349 static int gaudi_suspend(struct hl_device *hdev)
4350 {
4351         int rc;
4352
4353         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4354         if (rc)
4355                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4356
4357         return rc;
4358 }
4359
4360 static int gaudi_resume(struct hl_device *hdev)
4361 {
4362         return gaudi_init_iatu(hdev);
4363 }
4364
4365 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4366                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4367 {
4368         int rc;
4369
4370         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4371                         VM_DONTCOPY | VM_NORESERVE;
4372
4373         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4374                                 (dma_addr - HOST_PHYS_BASE), size);
4375         if (rc)
4376                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4377
4378         return rc;
4379 }
4380
4381 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4382 {
4383         struct cpu_dyn_regs *dyn_regs =
4384                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4385         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4386         struct gaudi_device *gaudi = hdev->asic_specific;
4387         bool invalid_queue = false;
4388         int dma_id;
4389
4390         switch (hw_queue_id) {
4391         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4392                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4393                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4394                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4395                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4396                 break;
4397
4398         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4399                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4400                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4401                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4402                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4403                 break;
4404
4405         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4406                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4407                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4408                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4409                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4410                 break;
4411
4412         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4413                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4414                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4415                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4416                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4417                 break;
4418
4419         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4420                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4421                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4422                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4423                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4424                 break;
4425
4426         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4427                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4428                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4429                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4430                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4431                 break;
4432
4433         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4434                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4435                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4436                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4437                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4438                 break;
4439
4440         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4441                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4442                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4443                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4444                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4445                 break;
4446
4447         case GAUDI_QUEUE_ID_CPU_PQ:
4448                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4449                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4450                 else
4451                         invalid_queue = true;
4452                 break;
4453
4454         case GAUDI_QUEUE_ID_MME_0_0:
4455                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_MME_0_1:
4459                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4460                 break;
4461
4462         case GAUDI_QUEUE_ID_MME_0_2:
4463                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4464                 break;
4465
4466         case GAUDI_QUEUE_ID_MME_0_3:
4467                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4468                 break;
4469
4470         case GAUDI_QUEUE_ID_MME_1_0:
4471                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_MME_1_1:
4475                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4476                 break;
4477
4478         case GAUDI_QUEUE_ID_MME_1_2:
4479                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4480                 break;
4481
4482         case GAUDI_QUEUE_ID_MME_1_3:
4483                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4484                 break;
4485
4486         case GAUDI_QUEUE_ID_TPC_0_0:
4487                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_TPC_0_1:
4491                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4492                 break;
4493
4494         case GAUDI_QUEUE_ID_TPC_0_2:
4495                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_TPC_0_3:
4499                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4500                 break;
4501
4502         case GAUDI_QUEUE_ID_TPC_1_0:
4503                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_TPC_1_1:
4507                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4508                 break;
4509
4510         case GAUDI_QUEUE_ID_TPC_1_2:
4511                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_TPC_1_3:
4515                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4516                 break;
4517
4518         case GAUDI_QUEUE_ID_TPC_2_0:
4519                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_TPC_2_1:
4523                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4524                 break;
4525
4526         case GAUDI_QUEUE_ID_TPC_2_2:
4527                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_TPC_2_3:
4531                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4532                 break;
4533
4534         case GAUDI_QUEUE_ID_TPC_3_0:
4535                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_TPC_3_1:
4539                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4540                 break;
4541
4542         case GAUDI_QUEUE_ID_TPC_3_2:
4543                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4544                 break;
4545
4546         case GAUDI_QUEUE_ID_TPC_3_3:
4547                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4548                 break;
4549
4550         case GAUDI_QUEUE_ID_TPC_4_0:
4551                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4552                 break;
4553
4554         case GAUDI_QUEUE_ID_TPC_4_1:
4555                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4556                 break;
4557
4558         case GAUDI_QUEUE_ID_TPC_4_2:
4559                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4560                 break;
4561
4562         case GAUDI_QUEUE_ID_TPC_4_3:
4563                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4564                 break;
4565
4566         case GAUDI_QUEUE_ID_TPC_5_0:
4567                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4568                 break;
4569
4570         case GAUDI_QUEUE_ID_TPC_5_1:
4571                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4572                 break;
4573
4574         case GAUDI_QUEUE_ID_TPC_5_2:
4575                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4576                 break;
4577
4578         case GAUDI_QUEUE_ID_TPC_5_3:
4579                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4580                 break;
4581
4582         case GAUDI_QUEUE_ID_TPC_6_0:
4583                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4584                 break;
4585
4586         case GAUDI_QUEUE_ID_TPC_6_1:
4587                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4588                 break;
4589
4590         case GAUDI_QUEUE_ID_TPC_6_2:
4591                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4592                 break;
4593
4594         case GAUDI_QUEUE_ID_TPC_6_3:
4595                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4596                 break;
4597
4598         case GAUDI_QUEUE_ID_TPC_7_0:
4599                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4600                 break;
4601
4602         case GAUDI_QUEUE_ID_TPC_7_1:
4603                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4604                 break;
4605
4606         case GAUDI_QUEUE_ID_TPC_7_2:
4607                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4608                 break;
4609
4610         case GAUDI_QUEUE_ID_TPC_7_3:
4611                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4612                 break;
4613
4614         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4615                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4616                         invalid_queue = true;
4617
4618                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4619                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4620                 break;
4621
4622         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4623                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4624                         invalid_queue = true;
4625
4626                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4627                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4628                 break;
4629
4630         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4631                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4632                         invalid_queue = true;
4633
4634                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4635                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4636                 break;
4637
4638         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4639                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4640                         invalid_queue = true;
4641
4642                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4643                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4644                 break;
4645
4646         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4647                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4648                         invalid_queue = true;
4649
4650                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4651                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4652                 break;
4653
4654         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4655                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4656                         invalid_queue = true;
4657
4658                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4659                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4660                 break;
4661
4662         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4663                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4664                         invalid_queue = true;
4665
4666                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4667                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4668                 break;
4669
4670         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4671                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4672                         invalid_queue = true;
4673
4674                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4675                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4676                 break;
4677
4678         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4679                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4680                         invalid_queue = true;
4681
4682                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4683                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4684                 break;
4685
4686         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4687                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4688                         invalid_queue = true;
4689
4690                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4691                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4692                 break;
4693
4694         default:
4695                 invalid_queue = true;
4696         }
4697
4698         if (invalid_queue) {
4699                 /* Should never get here */
4700                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4701                         hw_queue_id);
4702                 return;
4703         }
4704
4705         db_value = pi;
4706
4707         /* ring the doorbell */
4708         WREG32(db_reg_offset, db_value);
4709
4710         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4711                 /* make sure device CPU will read latest data from host */
4712                 mb();
4713
4714                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4715                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4716                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4717
4718                 WREG32(irq_handler_offset,
4719                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4720         }
4721 }
4722
4723 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4724                                 struct hl_bd *bd)
4725 {
4726         __le64 *pbd = (__le64 *) bd;
4727
4728         /* The QMANs are on the host memory so a simple copy suffice */
4729         pqe[0] = pbd[0];
4730         pqe[1] = pbd[1];
4731 }
4732
4733 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4734                                         dma_addr_t *dma_handle, gfp_t flags)
4735 {
4736         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4737                                                 dma_handle, flags);
4738
4739         /* Shift to the device's base physical address of host memory */
4740         if (kernel_addr)
4741                 *dma_handle += HOST_PHYS_BASE;
4742
4743         return kernel_addr;
4744 }
4745
4746 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4747                 void *cpu_addr, dma_addr_t dma_handle)
4748 {
4749         /* Cancel the device's base physical address of host memory */
4750         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4751
4752         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4753 }
4754
4755 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4756 {
4757         struct asic_fixed_properties *prop = &hdev->asic_prop;
4758         u64  cur_addr = DRAM_BASE_ADDR_USER;
4759         u32 val;
4760         u32 chunk_size;
4761         int rc, dma_id;
4762
4763         while (cur_addr < prop->dram_end_address) {
4764                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4765                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4766
4767                         chunk_size =
4768                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4769
4770                         dev_dbg(hdev->dev,
4771                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4772                                 cur_addr, cur_addr + chunk_size);
4773
4774                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4775                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4776                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4777                                                 lower_32_bits(cur_addr));
4778                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4779                                                 upper_32_bits(cur_addr));
4780                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4781                                         chunk_size);
4782                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4783                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4784                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4785
4786                         cur_addr += chunk_size;
4787
4788                         if (cur_addr == prop->dram_end_address)
4789                                 break;
4790                 }
4791
4792                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4793                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4794
4795                         rc = hl_poll_timeout(
4796                                 hdev,
4797                                 mmDMA0_CORE_STS0 + dma_offset,
4798                                 val,
4799                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4800                                 1000,
4801                                 HBM_SCRUBBING_TIMEOUT_US);
4802
4803                         if (rc) {
4804                                 dev_err(hdev->dev,
4805                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4806                                         dma_id);
4807                                 return -EIO;
4808                         }
4809                 }
4810         }
4811
4812         return 0;
4813 }
4814
4815 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4816 {
4817         struct asic_fixed_properties *prop = &hdev->asic_prop;
4818         struct gaudi_device *gaudi = hdev->asic_specific;
4819         int rc = 0;
4820         u64 val = 0;
4821
4822         if (!hdev->memory_scrub)
4823                 return 0;
4824
4825         if (!addr && !size) {
4826                 /* Wait till device is idle */
4827                 rc = hl_poll_timeout(
4828                                 hdev,
4829                                 mmDMA0_CORE_STS0/* dummy */,
4830                                 val/* dummy */,
4831                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4832                                                 0, NULL)),
4833                                                 1000,
4834                                                 HBM_SCRUBBING_TIMEOUT_US);
4835                 if (rc) {
4836                         dev_err(hdev->dev, "waiting for idle timeout\n");
4837                         return -EIO;
4838                 }
4839
4840                 /* Scrub SRAM */
4841                 addr = prop->sram_user_base_address;
4842                 size = hdev->pldm ? 0x10000 :
4843                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4844                 val = 0x7777777777777777ull;
4845
4846                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4847                 if (rc) {
4848                         dev_err(hdev->dev,
4849                                 "Failed to clear SRAM in mem scrub all\n");
4850                         return rc;
4851                 }
4852
4853                 mutex_lock(&gaudi->clk_gate_mutex);
4854                 hdev->asic_funcs->disable_clock_gating(hdev);
4855
4856                 /* Scrub HBM using all DMA channels in parallel */
4857                 rc = gaudi_hbm_scrubbing(hdev);
4858                 if (rc)
4859                         dev_err(hdev->dev,
4860                                 "Failed to clear HBM in mem scrub all\n");
4861
4862                 hdev->asic_funcs->set_clock_gating(hdev);
4863                 mutex_unlock(&gaudi->clk_gate_mutex);
4864         }
4865
4866         return rc;
4867 }
4868
4869 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4870                                 u32 queue_id, dma_addr_t *dma_handle,
4871                                 u16 *queue_len)
4872 {
4873         struct gaudi_device *gaudi = hdev->asic_specific;
4874         struct gaudi_internal_qman_info *q;
4875
4876         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4877                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4878                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4879                 return NULL;
4880         }
4881
4882         q = &gaudi->internal_qmans[queue_id];
4883         *dma_handle = q->pq_dma_addr;
4884         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4885
4886         return q->pq_kernel_addr;
4887 }
4888
4889 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4890                                 u16 len, u32 timeout, u64 *result)
4891 {
4892         struct gaudi_device *gaudi = hdev->asic_specific;
4893
4894         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4895                 if (result)
4896                         *result = 0;
4897                 return 0;
4898         }
4899
4900         if (!timeout)
4901                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4902
4903         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4904                                                 timeout, result);
4905 }
4906
4907 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4908 {
4909         struct packet_msg_prot *fence_pkt;
4910         dma_addr_t pkt_dma_addr;
4911         u32 fence_val, tmp, timeout_usec;
4912         dma_addr_t fence_dma_addr;
4913         u32 *fence_ptr;
4914         int rc;
4915
4916         if (hdev->pldm)
4917                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4918         else
4919                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4920
4921         fence_val = GAUDI_QMAN0_FENCE_VAL;
4922
4923         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4924                                                         &fence_dma_addr);
4925         if (!fence_ptr) {
4926                 dev_err(hdev->dev,
4927                         "Failed to allocate memory for H/W queue %d testing\n",
4928                         hw_queue_id);
4929                 return -ENOMEM;
4930         }
4931
4932         *fence_ptr = 0;
4933
4934         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4935                                         sizeof(struct packet_msg_prot),
4936                                         GFP_KERNEL, &pkt_dma_addr);
4937         if (!fence_pkt) {
4938                 dev_err(hdev->dev,
4939                         "Failed to allocate packet for H/W queue %d testing\n",
4940                         hw_queue_id);
4941                 rc = -ENOMEM;
4942                 goto free_fence_ptr;
4943         }
4944
4945         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4946         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4947         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4948
4949         fence_pkt->ctl = cpu_to_le32(tmp);
4950         fence_pkt->value = cpu_to_le32(fence_val);
4951         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4952
4953         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4954                                         sizeof(struct packet_msg_prot),
4955                                         pkt_dma_addr);
4956         if (rc) {
4957                 dev_err(hdev->dev,
4958                         "Failed to send fence packet to H/W queue %d\n",
4959                         hw_queue_id);
4960                 goto free_pkt;
4961         }
4962
4963         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4964                                         1000, timeout_usec, true);
4965
4966         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4967
4968         if (rc == -ETIMEDOUT) {
4969                 dev_err(hdev->dev,
4970                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4971                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4972                 rc = -EIO;
4973         }
4974
4975 free_pkt:
4976         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4977                                         pkt_dma_addr);
4978 free_fence_ptr:
4979         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4980                                         fence_dma_addr);
4981         return rc;
4982 }
4983
4984 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4985 {
4986         struct gaudi_device *gaudi = hdev->asic_specific;
4987
4988         /*
4989          * check capability here as send_cpu_message() won't update the result
4990          * value if no capability
4991          */
4992         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4993                 return 0;
4994
4995         return hl_fw_test_cpu_queue(hdev);
4996 }
4997
4998 static int gaudi_test_queues(struct hl_device *hdev)
4999 {
5000         int i, rc, ret_val = 0;
5001
5002         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5003                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5004                         rc = gaudi_test_queue(hdev, i);
5005                         if (rc)
5006                                 ret_val = -EINVAL;
5007                 }
5008         }
5009
5010         rc = gaudi_test_cpu_queue(hdev);
5011         if (rc)
5012                 ret_val = -EINVAL;
5013
5014         return ret_val;
5015 }
5016
5017 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5018                 gfp_t mem_flags, dma_addr_t *dma_handle)
5019 {
5020         void *kernel_addr;
5021
5022         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5023                 return NULL;
5024
5025         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5026
5027         /* Shift to the device's base physical address of host memory */
5028         if (kernel_addr)
5029                 *dma_handle += HOST_PHYS_BASE;
5030
5031         return kernel_addr;
5032 }
5033
5034 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5035                         dma_addr_t dma_addr)
5036 {
5037         /* Cancel the device's base physical address of host memory */
5038         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5039
5040         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5041 }
5042
5043 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5044                                         size_t size, dma_addr_t *dma_handle)
5045 {
5046         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5047 }
5048
5049 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5050                                                 size_t size, void *vaddr)
5051 {
5052         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5053 }
5054
5055 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5056                         int nents, enum dma_data_direction dir)
5057 {
5058         struct scatterlist *sg;
5059         int i;
5060
5061         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5062                 return -ENOMEM;
5063
5064         /* Shift to the device's base physical address of host memory */
5065         for_each_sg(sgl, sg, nents, i)
5066                 sg->dma_address += HOST_PHYS_BASE;
5067
5068         return 0;
5069 }
5070
5071 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5072                         int nents, enum dma_data_direction dir)
5073 {
5074         struct scatterlist *sg;
5075         int i;
5076
5077         /* Cancel the device's base physical address of host memory */
5078         for_each_sg(sgl, sg, nents, i)
5079                 sg->dma_address -= HOST_PHYS_BASE;
5080
5081         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5082 }
5083
5084 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5085                                         struct sg_table *sgt)
5086 {
5087         struct scatterlist *sg, *sg_next_iter;
5088         u32 count, dma_desc_cnt;
5089         u64 len, len_next;
5090         dma_addr_t addr, addr_next;
5091
5092         dma_desc_cnt = 0;
5093
5094         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5095
5096                 len = sg_dma_len(sg);
5097                 addr = sg_dma_address(sg);
5098
5099                 if (len == 0)
5100                         break;
5101
5102                 while ((count + 1) < sgt->nents) {
5103                         sg_next_iter = sg_next(sg);
5104                         len_next = sg_dma_len(sg_next_iter);
5105                         addr_next = sg_dma_address(sg_next_iter);
5106
5107                         if (len_next == 0)
5108                                 break;
5109
5110                         if ((addr + len == addr_next) &&
5111                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5112                                 len += len_next;
5113                                 count++;
5114                                 sg = sg_next_iter;
5115                         } else {
5116                                 break;
5117                         }
5118                 }
5119
5120                 dma_desc_cnt++;
5121         }
5122
5123         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5124 }
5125
5126 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5127                                 struct hl_cs_parser *parser,
5128                                 struct packet_lin_dma *user_dma_pkt,
5129                                 u64 addr, enum dma_data_direction dir)
5130 {
5131         struct hl_userptr *userptr;
5132         int rc;
5133
5134         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5135                         parser->job_userptr_list, &userptr))
5136                 goto already_pinned;
5137
5138         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5139         if (!userptr)
5140                 return -ENOMEM;
5141
5142         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5143                                 userptr);
5144         if (rc)
5145                 goto free_userptr;
5146
5147         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5148
5149         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5150                                         userptr->sgt->nents, dir);
5151         if (rc) {
5152                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5153                 goto unpin_memory;
5154         }
5155
5156         userptr->dma_mapped = true;
5157         userptr->dir = dir;
5158
5159 already_pinned:
5160         parser->patched_cb_size +=
5161                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5162
5163         return 0;
5164
5165 unpin_memory:
5166         list_del(&userptr->job_node);
5167         hl_unpin_host_memory(hdev, userptr);
5168 free_userptr:
5169         kfree(userptr);
5170         return rc;
5171 }
5172
5173 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5174                                 struct hl_cs_parser *parser,
5175                                 struct packet_lin_dma *user_dma_pkt,
5176                                 bool src_in_host)
5177 {
5178         enum dma_data_direction dir;
5179         bool skip_host_mem_pin = false, user_memset;
5180         u64 addr;
5181         int rc = 0;
5182
5183         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5184                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5185                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5186
5187         if (src_in_host) {
5188                 if (user_memset)
5189                         skip_host_mem_pin = true;
5190
5191                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5192                 dir = DMA_TO_DEVICE;
5193                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5194         } else {
5195                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5196                 dir = DMA_FROM_DEVICE;
5197                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5198                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5199                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5200         }
5201
5202         if (skip_host_mem_pin)
5203                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5204         else
5205                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5206                                                 addr, dir);
5207
5208         return rc;
5209 }
5210
5211 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5212                                 struct hl_cs_parser *parser,
5213                                 struct packet_lin_dma *user_dma_pkt)
5214 {
5215         bool src_in_host = false;
5216         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5217                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5218                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5219
5220         dev_dbg(hdev->dev, "DMA packet details:\n");
5221         dev_dbg(hdev->dev, "source == 0x%llx\n",
5222                                 le64_to_cpu(user_dma_pkt->src_addr));
5223         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5224         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5225
5226         /*
5227          * Special handling for DMA with size 0. Bypass all validations
5228          * because no transactions will be done except for WR_COMP, which
5229          * is not a security issue
5230          */
5231         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5232                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5233                 return 0;
5234         }
5235
5236         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5237                 src_in_host = true;
5238
5239         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5240                                                 src_in_host);
5241 }
5242
5243 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5244                                         struct hl_cs_parser *parser,
5245                                         struct packet_load_and_exe *user_pkt)
5246 {
5247         u32 cfg;
5248
5249         cfg = le32_to_cpu(user_pkt->cfg);
5250
5251         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5252                 dev_err(hdev->dev,
5253                         "User not allowed to use Load and Execute\n");
5254                 return -EPERM;
5255         }
5256
5257         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5258
5259         return 0;
5260 }
5261
5262 static int gaudi_validate_cb(struct hl_device *hdev,
5263                         struct hl_cs_parser *parser, bool is_mmu)
5264 {
5265         u32 cb_parsed_length = 0;
5266         int rc = 0;
5267
5268         parser->patched_cb_size = 0;
5269
5270         /* cb_user_size is more than 0 so loop will always be executed */
5271         while (cb_parsed_length < parser->user_cb_size) {
5272                 enum packet_id pkt_id;
5273                 u16 pkt_size;
5274                 struct gaudi_packet *user_pkt;
5275
5276                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5277
5278                 pkt_id = (enum packet_id) (
5279                                 (le64_to_cpu(user_pkt->header) &
5280                                 PACKET_HEADER_PACKET_ID_MASK) >>
5281                                         PACKET_HEADER_PACKET_ID_SHIFT);
5282
5283                 if (!validate_packet_id(pkt_id)) {
5284                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5285                         rc = -EINVAL;
5286                         break;
5287                 }
5288
5289                 pkt_size = gaudi_packet_sizes[pkt_id];
5290                 cb_parsed_length += pkt_size;
5291                 if (cb_parsed_length > parser->user_cb_size) {
5292                         dev_err(hdev->dev,
5293                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5294                         rc = -EINVAL;
5295                         break;
5296                 }
5297
5298                 switch (pkt_id) {
5299                 case PACKET_MSG_PROT:
5300                         dev_err(hdev->dev,
5301                                 "User not allowed to use MSG_PROT\n");
5302                         rc = -EPERM;
5303                         break;
5304
5305                 case PACKET_CP_DMA:
5306                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5307                         rc = -EPERM;
5308                         break;
5309
5310                 case PACKET_STOP:
5311                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5312                         rc = -EPERM;
5313                         break;
5314
5315                 case PACKET_WREG_BULK:
5316                         dev_err(hdev->dev,
5317                                 "User not allowed to use WREG_BULK\n");
5318                         rc = -EPERM;
5319                         break;
5320
5321                 case PACKET_LOAD_AND_EXE:
5322                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5323                                 (struct packet_load_and_exe *) user_pkt);
5324                         break;
5325
5326                 case PACKET_LIN_DMA:
5327                         parser->contains_dma_pkt = true;
5328                         if (is_mmu)
5329                                 parser->patched_cb_size += pkt_size;
5330                         else
5331                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5332                                         (struct packet_lin_dma *) user_pkt);
5333                         break;
5334
5335                 case PACKET_WREG_32:
5336                 case PACKET_MSG_LONG:
5337                 case PACKET_MSG_SHORT:
5338                 case PACKET_REPEAT:
5339                 case PACKET_FENCE:
5340                 case PACKET_NOP:
5341                 case PACKET_ARB_POINT:
5342                         parser->patched_cb_size += pkt_size;
5343                         break;
5344
5345                 default:
5346                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5347                                 pkt_id);
5348                         rc = -EINVAL;
5349                         break;
5350                 }
5351
5352                 if (rc)
5353                         break;
5354         }
5355
5356         /*
5357          * The new CB should have space at the end for two MSG_PROT packets:
5358          * 1. A packet that will act as a completion packet
5359          * 2. A packet that will generate MSI-X interrupt
5360          */
5361         if (parser->completion)
5362                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5363
5364         return rc;
5365 }
5366
5367 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5368                                 struct hl_cs_parser *parser,
5369                                 struct packet_lin_dma *user_dma_pkt,
5370                                 struct packet_lin_dma *new_dma_pkt,
5371                                 u32 *new_dma_pkt_size)
5372 {
5373         struct hl_userptr *userptr;
5374         struct scatterlist *sg, *sg_next_iter;
5375         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5376         u64 len, len_next;
5377         dma_addr_t dma_addr, dma_addr_next;
5378         u64 device_memory_addr, addr;
5379         enum dma_data_direction dir;
5380         struct sg_table *sgt;
5381         bool src_in_host = false;
5382         bool skip_host_mem_pin = false;
5383         bool user_memset;
5384
5385         ctl = le32_to_cpu(user_dma_pkt->ctl);
5386
5387         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5388                 src_in_host = true;
5389
5390         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5391                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5392
5393         if (src_in_host) {
5394                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5395                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5396                 dir = DMA_TO_DEVICE;
5397                 if (user_memset)
5398                         skip_host_mem_pin = true;
5399         } else {
5400                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5401                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5402                 dir = DMA_FROM_DEVICE;
5403         }
5404
5405         if ((!skip_host_mem_pin) &&
5406                 (!hl_userptr_is_pinned(hdev, addr,
5407                                         le32_to_cpu(user_dma_pkt->tsize),
5408                                         parser->job_userptr_list, &userptr))) {
5409                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5410                                 addr, user_dma_pkt->tsize);
5411                 return -EFAULT;
5412         }
5413
5414         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5415                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5416                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5417                 return 0;
5418         }
5419
5420         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5421
5422         sgt = userptr->sgt;
5423         dma_desc_cnt = 0;
5424
5425         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5426                 len = sg_dma_len(sg);
5427                 dma_addr = sg_dma_address(sg);
5428
5429                 if (len == 0)
5430                         break;
5431
5432                 while ((count + 1) < sgt->nents) {
5433                         sg_next_iter = sg_next(sg);
5434                         len_next = sg_dma_len(sg_next_iter);
5435                         dma_addr_next = sg_dma_address(sg_next_iter);
5436
5437                         if (len_next == 0)
5438                                 break;
5439
5440                         if ((dma_addr + len == dma_addr_next) &&
5441                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5442                                 len += len_next;
5443                                 count++;
5444                                 sg = sg_next_iter;
5445                         } else {
5446                                 break;
5447                         }
5448                 }
5449
5450                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5451                 if (likely(dma_desc_cnt))
5452                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5453                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5454                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5455                 new_dma_pkt->tsize = cpu_to_le32(len);
5456
5457                 if (dir == DMA_TO_DEVICE) {
5458                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5459                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5460                 } else {
5461                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5462                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5463                 }
5464
5465                 if (!user_memset)
5466                         device_memory_addr += len;
5467                 dma_desc_cnt++;
5468                 new_dma_pkt++;
5469         }
5470
5471         if (!dma_desc_cnt) {
5472                 dev_err(hdev->dev,
5473                         "Error of 0 SG entries when patching DMA packet\n");
5474                 return -EFAULT;
5475         }
5476
5477         /* Fix the last dma packet - wrcomp must be as user set it */
5478         new_dma_pkt--;
5479         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5480
5481         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5482
5483         return 0;
5484 }
5485
5486 static int gaudi_patch_cb(struct hl_device *hdev,
5487                                 struct hl_cs_parser *parser)
5488 {
5489         u32 cb_parsed_length = 0;
5490         u32 cb_patched_cur_length = 0;
5491         int rc = 0;
5492
5493         /* cb_user_size is more than 0 so loop will always be executed */
5494         while (cb_parsed_length < parser->user_cb_size) {
5495                 enum packet_id pkt_id;
5496                 u16 pkt_size;
5497                 u32 new_pkt_size = 0;
5498                 struct gaudi_packet *user_pkt, *kernel_pkt;
5499
5500                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5501                 kernel_pkt = parser->patched_cb->kernel_address +
5502                                         cb_patched_cur_length;
5503
5504                 pkt_id = (enum packet_id) (
5505                                 (le64_to_cpu(user_pkt->header) &
5506                                 PACKET_HEADER_PACKET_ID_MASK) >>
5507                                         PACKET_HEADER_PACKET_ID_SHIFT);
5508
5509                 if (!validate_packet_id(pkt_id)) {
5510                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5511                         rc = -EINVAL;
5512                         break;
5513                 }
5514
5515                 pkt_size = gaudi_packet_sizes[pkt_id];
5516                 cb_parsed_length += pkt_size;
5517                 if (cb_parsed_length > parser->user_cb_size) {
5518                         dev_err(hdev->dev,
5519                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5520                         rc = -EINVAL;
5521                         break;
5522                 }
5523
5524                 switch (pkt_id) {
5525                 case PACKET_LIN_DMA:
5526                         rc = gaudi_patch_dma_packet(hdev, parser,
5527                                         (struct packet_lin_dma *) user_pkt,
5528                                         (struct packet_lin_dma *) kernel_pkt,
5529                                         &new_pkt_size);
5530                         cb_patched_cur_length += new_pkt_size;
5531                         break;
5532
5533                 case PACKET_MSG_PROT:
5534                         dev_err(hdev->dev,
5535                                 "User not allowed to use MSG_PROT\n");
5536                         rc = -EPERM;
5537                         break;
5538
5539                 case PACKET_CP_DMA:
5540                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5541                         rc = -EPERM;
5542                         break;
5543
5544                 case PACKET_STOP:
5545                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5546                         rc = -EPERM;
5547                         break;
5548
5549                 case PACKET_WREG_32:
5550                 case PACKET_WREG_BULK:
5551                 case PACKET_MSG_LONG:
5552                 case PACKET_MSG_SHORT:
5553                 case PACKET_REPEAT:
5554                 case PACKET_FENCE:
5555                 case PACKET_NOP:
5556                 case PACKET_ARB_POINT:
5557                 case PACKET_LOAD_AND_EXE:
5558                         memcpy(kernel_pkt, user_pkt, pkt_size);
5559                         cb_patched_cur_length += pkt_size;
5560                         break;
5561
5562                 default:
5563                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5564                                 pkt_id);
5565                         rc = -EINVAL;
5566                         break;
5567                 }
5568
5569                 if (rc)
5570                         break;
5571         }
5572
5573         return rc;
5574 }
5575
5576 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5577                 struct hl_cs_parser *parser)
5578 {
5579         u64 patched_cb_handle;
5580         u32 patched_cb_size;
5581         struct hl_cb *user_cb;
5582         int rc;
5583
5584         /*
5585          * The new CB should have space at the end for two MSG_PROT pkt:
5586          * 1. A packet that will act as a completion packet
5587          * 2. A packet that will generate MSI interrupt
5588          */
5589         if (parser->completion)
5590                 parser->patched_cb_size = parser->user_cb_size +
5591                                 sizeof(struct packet_msg_prot) * 2;
5592         else
5593                 parser->patched_cb_size = parser->user_cb_size;
5594
5595         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5596                                 parser->patched_cb_size, false, false,
5597                                 &patched_cb_handle);
5598
5599         if (rc) {
5600                 dev_err(hdev->dev,
5601                         "Failed to allocate patched CB for DMA CS %d\n",
5602                         rc);
5603                 return rc;
5604         }
5605
5606         patched_cb_handle >>= PAGE_SHIFT;
5607         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5608                                 (u32) patched_cb_handle);
5609         /* hl_cb_get should never fail */
5610         if (!parser->patched_cb) {
5611                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5612                         (u32) patched_cb_handle);
5613                 rc = -EFAULT;
5614                 goto out;
5615         }
5616
5617         /*
5618          * The check that parser->user_cb_size <= parser->user_cb->size was done
5619          * in validate_queue_index().
5620          */
5621         memcpy(parser->patched_cb->kernel_address,
5622                 parser->user_cb->kernel_address,
5623                 parser->user_cb_size);
5624
5625         patched_cb_size = parser->patched_cb_size;
5626
5627         /* Validate patched CB instead of user CB */
5628         user_cb = parser->user_cb;
5629         parser->user_cb = parser->patched_cb;
5630         rc = gaudi_validate_cb(hdev, parser, true);
5631         parser->user_cb = user_cb;
5632
5633         if (rc) {
5634                 hl_cb_put(parser->patched_cb);
5635                 goto out;
5636         }
5637
5638         if (patched_cb_size != parser->patched_cb_size) {
5639                 dev_err(hdev->dev, "user CB size mismatch\n");
5640                 hl_cb_put(parser->patched_cb);
5641                 rc = -EINVAL;
5642                 goto out;
5643         }
5644
5645 out:
5646         /*
5647          * Always call cb destroy here because we still have 1 reference
5648          * to it by calling cb_get earlier. After the job will be completed,
5649          * cb_put will release it, but here we want to remove it from the
5650          * idr
5651          */
5652         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5653                                         patched_cb_handle << PAGE_SHIFT);
5654
5655         return rc;
5656 }
5657
5658 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5659                 struct hl_cs_parser *parser)
5660 {
5661         u64 patched_cb_handle;
5662         int rc;
5663
5664         rc = gaudi_validate_cb(hdev, parser, false);
5665
5666         if (rc)
5667                 goto free_userptr;
5668
5669         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5670                                 parser->patched_cb_size, false, false,
5671                                 &patched_cb_handle);
5672         if (rc) {
5673                 dev_err(hdev->dev,
5674                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5675                 goto free_userptr;
5676         }
5677
5678         patched_cb_handle >>= PAGE_SHIFT;
5679         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5680                                 (u32) patched_cb_handle);
5681         /* hl_cb_get should never fail here */
5682         if (!parser->patched_cb) {
5683                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5684                                 (u32) patched_cb_handle);
5685                 rc = -EFAULT;
5686                 goto out;
5687         }
5688
5689         rc = gaudi_patch_cb(hdev, parser);
5690
5691         if (rc)
5692                 hl_cb_put(parser->patched_cb);
5693
5694 out:
5695         /*
5696          * Always call cb destroy here because we still have 1 reference
5697          * to it by calling cb_get earlier. After the job will be completed,
5698          * cb_put will release it, but here we want to remove it from the
5699          * idr
5700          */
5701         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5702                                 patched_cb_handle << PAGE_SHIFT);
5703
5704 free_userptr:
5705         if (rc)
5706                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5707         return rc;
5708 }
5709
5710 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5711                                         struct hl_cs_parser *parser)
5712 {
5713         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5714         struct gaudi_device *gaudi = hdev->asic_specific;
5715         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5716                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5717
5718         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5719                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5720                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5721                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5722                                 parser->hw_queue_id);
5723                 return -EINVAL;
5724         }
5725
5726         /* For internal queue jobs just check if CB address is valid */
5727         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5728                                         parser->user_cb_size,
5729                                         asic_prop->sram_user_base_address,
5730                                         asic_prop->sram_end_address))
5731                 return 0;
5732
5733         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5734                                         parser->user_cb_size,
5735                                         asic_prop->dram_user_base_address,
5736                                         asic_prop->dram_end_address))
5737                 return 0;
5738
5739         /* PMMU and HPMMU addresses are equal, check only one of them */
5740         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5741                                         parser->user_cb_size,
5742                                         asic_prop->pmmu.start_addr,
5743                                         asic_prop->pmmu.end_addr))
5744                 return 0;
5745
5746         dev_err(hdev->dev,
5747                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5748                 parser->user_cb, parser->user_cb_size);
5749
5750         return -EFAULT;
5751 }
5752
5753 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5754 {
5755         struct gaudi_device *gaudi = hdev->asic_specific;
5756
5757         if (parser->queue_type == QUEUE_TYPE_INT)
5758                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5759
5760         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5761                 return gaudi_parse_cb_mmu(hdev, parser);
5762         else
5763                 return gaudi_parse_cb_no_mmu(hdev, parser);
5764 }
5765
5766 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5767                                         void *kernel_address, u32 len,
5768                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5769                                         bool eb)
5770 {
5771         struct gaudi_device *gaudi = hdev->asic_specific;
5772         struct packet_msg_prot *cq_pkt;
5773         u64 msi_addr;
5774         u32 tmp;
5775
5776         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5777
5778         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5779         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5780
5781         if (eb)
5782                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5783
5784         cq_pkt->ctl = cpu_to_le32(tmp);
5785         cq_pkt->value = cpu_to_le32(cq_val);
5786         cq_pkt->addr = cpu_to_le64(cq_addr);
5787
5788         cq_pkt++;
5789
5790         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5791         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5792         cq_pkt->ctl = cpu_to_le32(tmp);
5793         cq_pkt->value = cpu_to_le32(1);
5794
5795         if (gaudi->multi_msi_mode)
5796                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5797         else
5798                 msi_addr = mmPCIE_CORE_MSI_REQ;
5799
5800         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5801 }
5802
5803 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5804 {
5805         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5806 }
5807
5808 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5809                                         u32 size, u64 val)
5810 {
5811         struct packet_lin_dma *lin_dma_pkt;
5812         struct hl_cs_job *job;
5813         u32 cb_size, ctl, err_cause;
5814         struct hl_cb *cb;
5815         u64 id;
5816         int rc;
5817
5818         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5819         if (!cb)
5820                 return -EFAULT;
5821
5822         lin_dma_pkt = cb->kernel_address;
5823         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5824         cb_size = sizeof(*lin_dma_pkt);
5825
5826         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5827         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5828         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5829         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5830         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5831
5832         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5833         lin_dma_pkt->src_addr = cpu_to_le64(val);
5834         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5835         lin_dma_pkt->tsize = cpu_to_le32(size);
5836
5837         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5838         if (!job) {
5839                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5840                 rc = -ENOMEM;
5841                 goto release_cb;
5842         }
5843
5844         /* Verify DMA is OK */
5845         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5846         if (err_cause && !hdev->init_done) {
5847                 dev_dbg(hdev->dev,
5848                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5849                         err_cause);
5850                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5851         }
5852
5853         job->id = 0;
5854         job->user_cb = cb;
5855         atomic_inc(&job->user_cb->cs_cnt);
5856         job->user_cb_size = cb_size;
5857         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5858         job->patched_cb = job->user_cb;
5859         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5860
5861         hl_debugfs_add_job(hdev, job);
5862
5863         rc = gaudi_send_job_on_qman0(hdev, job);
5864         hl_debugfs_remove_job(hdev, job);
5865         kfree(job);
5866         atomic_dec(&cb->cs_cnt);
5867
5868         /* Verify DMA is OK */
5869         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5870         if (err_cause) {
5871                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5872                 rc = -EIO;
5873                 if (!hdev->init_done) {
5874                         dev_dbg(hdev->dev,
5875                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5876                                 err_cause);
5877                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5878                 }
5879         }
5880
5881 release_cb:
5882         id = cb->id;
5883         hl_cb_put(cb);
5884         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5885
5886         return rc;
5887 }
5888
5889 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5890                                         u32 num_regs, u32 val)
5891 {
5892         struct packet_msg_long *pkt;
5893         struct hl_cs_job *job;
5894         u32 cb_size, ctl;
5895         struct hl_cb *cb;
5896         int i, rc;
5897
5898         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5899
5900         if (cb_size > SZ_2M) {
5901                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5902                 return -ENOMEM;
5903         }
5904
5905         cb = hl_cb_kernel_create(hdev, cb_size, false);
5906         if (!cb)
5907                 return -EFAULT;
5908
5909         pkt = cb->kernel_address;
5910
5911         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5912         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5913         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5914         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5915         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5916
5917         for (i = 0; i < num_regs ; i++, pkt++) {
5918                 pkt->ctl = cpu_to_le32(ctl);
5919                 pkt->value = cpu_to_le32(val);
5920                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5921         }
5922
5923         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5924         if (!job) {
5925                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5926                 rc = -ENOMEM;
5927                 goto release_cb;
5928         }
5929
5930         job->id = 0;
5931         job->user_cb = cb;
5932         atomic_inc(&job->user_cb->cs_cnt);
5933         job->user_cb_size = cb_size;
5934         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5935         job->patched_cb = job->user_cb;
5936         job->job_cb_size = cb_size;
5937
5938         hl_debugfs_add_job(hdev, job);
5939
5940         rc = gaudi_send_job_on_qman0(hdev, job);
5941         hl_debugfs_remove_job(hdev, job);
5942         kfree(job);
5943         atomic_dec(&cb->cs_cnt);
5944
5945 release_cb:
5946         hl_cb_put(cb);
5947         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5948
5949         return rc;
5950 }
5951
5952 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5953 {
5954         u64 base_addr;
5955         u32 num_regs;
5956         int rc;
5957
5958         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5959         num_regs = NUM_OF_SOB_IN_BLOCK;
5960         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5961         if (rc) {
5962                 dev_err(hdev->dev, "failed resetting SM registers");
5963                 return -ENOMEM;
5964         }
5965
5966         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5967         num_regs = NUM_OF_SOB_IN_BLOCK;
5968         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5969         if (rc) {
5970                 dev_err(hdev->dev, "failed resetting SM registers");
5971                 return -ENOMEM;
5972         }
5973
5974         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5975         num_regs = NUM_OF_SOB_IN_BLOCK;
5976         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5977         if (rc) {
5978                 dev_err(hdev->dev, "failed resetting SM registers");
5979                 return -ENOMEM;
5980         }
5981
5982         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5983         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5984         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5985         if (rc) {
5986                 dev_err(hdev->dev, "failed resetting SM registers");
5987                 return -ENOMEM;
5988         }
5989
5990         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5991         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5992         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993         if (rc) {
5994                 dev_err(hdev->dev, "failed resetting SM registers");
5995                 return -ENOMEM;
5996         }
5997
5998         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5999         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6000         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001         if (rc) {
6002                 dev_err(hdev->dev, "failed resetting SM registers");
6003                 return -ENOMEM;
6004         }
6005
6006         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6007                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6008         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6009         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6010         if (rc) {
6011                 dev_err(hdev->dev, "failed resetting SM registers");
6012                 return -ENOMEM;
6013         }
6014
6015         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6016                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6017         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6018         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6019         if (rc) {
6020                 dev_err(hdev->dev, "failed resetting SM registers");
6021                 return -ENOMEM;
6022         }
6023
6024         return 0;
6025 }
6026
6027 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6028 {
6029         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6030                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6031         int i;
6032
6033         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6034                 u64 sob_addr = CFG_BASE +
6035                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6036                                 (i * sob_delta);
6037                 u32 dma_offset = i * DMA_CORE_OFFSET;
6038
6039                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6040                                 lower_32_bits(sob_addr));
6041                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6042                                 upper_32_bits(sob_addr));
6043                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6044
6045                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6046                  * modified by the user for SRAM reduction
6047                  */
6048                 if (i > 1)
6049                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6050                                                                 0x00000001);
6051         }
6052 }
6053
6054 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6055 {
6056         u32 qman_offset;
6057         int i;
6058
6059         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6060                 qman_offset = i * DMA_QMAN_OFFSET;
6061                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6062         }
6063
6064         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6065                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6066                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6067         }
6068
6069         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6070                 qman_offset = i * TPC_QMAN_OFFSET;
6071                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6072         }
6073
6074         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6075                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6076                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6077                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6078         }
6079 }
6080
6081 static int gaudi_restore_user_registers(struct hl_device *hdev)
6082 {
6083         int rc;
6084
6085         rc = gaudi_restore_sm_registers(hdev);
6086         if (rc)
6087                 return rc;
6088
6089         gaudi_restore_dma_registers(hdev);
6090         gaudi_restore_qm_registers(hdev);
6091
6092         return 0;
6093 }
6094
6095 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6096 {
6097         return 0;
6098 }
6099
6100 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6101 {
6102         struct asic_fixed_properties *prop = &hdev->asic_prop;
6103         struct gaudi_device *gaudi = hdev->asic_specific;
6104         u64 addr = prop->mmu_pgt_addr;
6105         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6106
6107         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6108                 return 0;
6109
6110         return gaudi_memset_device_memory(hdev, addr, size, 0);
6111 }
6112
6113 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6114 {
6115
6116 }
6117
6118 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6119                         bool user_address, u32 *val)
6120 {
6121         struct asic_fixed_properties *prop = &hdev->asic_prop;
6122         u64 hbm_bar_addr, host_phys_end;
6123         int rc = 0;
6124
6125         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6126
6127         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6128
6129                 *val = RREG32(addr - CFG_BASE);
6130
6131         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6132
6133                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6134
6135         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6136
6137                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6138
6139                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6140
6141                 if (hbm_bar_addr != U64_MAX) {
6142                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6143                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6144                 }
6145
6146                 if (hbm_bar_addr == U64_MAX)
6147                         rc = -EIO;
6148
6149         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6150                         user_address && !iommu_present(&pci_bus_type)) {
6151
6152                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6153
6154         } else {
6155                 rc = -EFAULT;
6156         }
6157
6158         return rc;
6159 }
6160
6161 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6162                         bool user_address, u32 val)
6163 {
6164         struct asic_fixed_properties *prop = &hdev->asic_prop;
6165         u64 hbm_bar_addr, host_phys_end;
6166         int rc = 0;
6167
6168         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6169
6170         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6171
6172                 WREG32(addr - CFG_BASE, val);
6173
6174         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6175
6176                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6177
6178         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6179
6180                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6181
6182                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6183
6184                 if (hbm_bar_addr != U64_MAX) {
6185                         writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6186                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6187                 }
6188
6189                 if (hbm_bar_addr == U64_MAX)
6190                         rc = -EIO;
6191
6192         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6193                         user_address && !iommu_present(&pci_bus_type)) {
6194
6195                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6196
6197         } else {
6198                 rc = -EFAULT;
6199         }
6200
6201         return rc;
6202 }
6203
6204 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6205                                 bool user_address, u64 *val)
6206 {
6207         struct asic_fixed_properties *prop = &hdev->asic_prop;
6208         u64 hbm_bar_addr, host_phys_end;
6209         int rc = 0;
6210
6211         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6212
6213         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6214
6215                 u32 val_l = RREG32(addr - CFG_BASE);
6216                 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6217
6218                 *val = (((u64) val_h) << 32) | val_l;
6219
6220         } else if ((addr >= SRAM_BASE_ADDR) &&
6221                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6222
6223                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6224
6225         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6226
6227                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6228
6229                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6230
6231                 if (hbm_bar_addr != U64_MAX) {
6232                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6233                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6234                 }
6235
6236                 if (hbm_bar_addr == U64_MAX)
6237                         rc = -EIO;
6238
6239         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6240                         user_address && !iommu_present(&pci_bus_type)) {
6241
6242                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6243
6244         } else {
6245                 rc = -EFAULT;
6246         }
6247
6248         return rc;
6249 }
6250
6251 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6252                                 bool user_address, u64 val)
6253 {
6254         struct asic_fixed_properties *prop = &hdev->asic_prop;
6255         u64 hbm_bar_addr, host_phys_end;
6256         int rc = 0;
6257
6258         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6259
6260         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6261
6262                 WREG32(addr - CFG_BASE, lower_32_bits(val));
6263                 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
6264
6265         } else if ((addr >= SRAM_BASE_ADDR) &&
6266                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6267
6268                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6269
6270         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6271
6272                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6273
6274                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6275
6276                 if (hbm_bar_addr != U64_MAX) {
6277                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6278                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6279                 }
6280
6281                 if (hbm_bar_addr == U64_MAX)
6282                         rc = -EIO;
6283
6284         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6285                         user_address && !iommu_present(&pci_bus_type)) {
6286
6287                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6288
6289         } else {
6290                 rc = -EFAULT;
6291         }
6292
6293         return rc;
6294 }
6295
6296 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6297                                         u32 size_to_dma, dma_addr_t dma_addr)
6298 {
6299         u32 err_cause, val;
6300         u64 dma_offset;
6301         int rc;
6302
6303         dma_offset = dma_id * DMA_CORE_OFFSET;
6304
6305         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6306         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6307         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6308         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6309         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6310         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6311                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6312
6313         rc = hl_poll_timeout(
6314                 hdev,
6315                 mmDMA0_CORE_STS0 + dma_offset,
6316                 val,
6317                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6318                 0,
6319                 1000000);
6320
6321         if (rc) {
6322                 dev_err(hdev->dev,
6323                         "DMA %d timed-out during reading of 0x%llx\n",
6324                         dma_id, addr);
6325                 return -EIO;
6326         }
6327
6328         /* Verify DMA is OK */
6329         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6330         if (err_cause) {
6331                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6332                 dev_dbg(hdev->dev,
6333                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6334                         err_cause);
6335                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6336
6337                 return -EIO;
6338         }
6339
6340         return 0;
6341 }
6342
6343 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6344                                 void *blob_addr)
6345 {
6346         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6347         struct gaudi_device *gaudi = hdev->asic_specific;
6348         u32 qm_glbl_sts0, qm_cgm_sts;
6349         u64 dma_offset, qm_offset;
6350         dma_addr_t dma_addr;
6351         void *kernel_addr;
6352         bool is_eng_idle;
6353         int rc = 0, dma_id;
6354
6355         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6356                                                 hdev, SZ_2M,
6357                                                 &dma_addr,
6358                                                 GFP_KERNEL | __GFP_ZERO);
6359
6360         if (!kernel_addr)
6361                 return -ENOMEM;
6362
6363         mutex_lock(&gaudi->clk_gate_mutex);
6364
6365         hdev->asic_funcs->disable_clock_gating(hdev);
6366
6367         hdev->asic_funcs->hw_queues_lock(hdev);
6368
6369         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6370         dma_offset = dma_id * DMA_CORE_OFFSET;
6371         qm_offset = dma_id * DMA_QMAN_OFFSET;
6372         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6373         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6374         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6375         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6376                       IS_DMA_IDLE(dma_core_sts0);
6377
6378         if (!is_eng_idle) {
6379                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6380                 dma_offset = dma_id * DMA_CORE_OFFSET;
6381                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6382                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6383                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6384                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6385                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6386                               IS_DMA_IDLE(dma_core_sts0);
6387
6388                 if (!is_eng_idle) {
6389                         dev_err_ratelimited(hdev->dev,
6390                                 "Can't read via DMA because it is BUSY\n");
6391                         rc = -EAGAIN;
6392                         goto out;
6393                 }
6394         }
6395
6396         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6397         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6398                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6399
6400         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6401          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6402          * ASID
6403          */
6404         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6405
6406         /* Verify DMA is OK */
6407         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6408         if (err_cause) {
6409                 dev_dbg(hdev->dev,
6410                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6411                         err_cause);
6412                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6413         }
6414
6415         pos = 0;
6416         size_left = size;
6417         size_to_dma = SZ_2M;
6418
6419         while (size_left > 0) {
6420
6421                 if (size_left < SZ_2M)
6422                         size_to_dma = size_left;
6423
6424                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6425                                                 dma_addr);
6426                 if (rc)
6427                         break;
6428
6429                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6430
6431                 if (size_left <= SZ_2M)
6432                         break;
6433
6434                 pos += SZ_2M;
6435                 addr += SZ_2M;
6436                 size_left -= SZ_2M;
6437         }
6438
6439         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6440          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6441          * ASID
6442          */
6443         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6444                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6445
6446         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6447
6448 out:
6449         hdev->asic_funcs->hw_queues_unlock(hdev);
6450
6451         hdev->asic_funcs->set_clock_gating(hdev);
6452
6453         mutex_unlock(&gaudi->clk_gate_mutex);
6454
6455         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6456                                                 dma_addr);
6457
6458         return rc;
6459 }
6460
6461 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6462 {
6463         struct gaudi_device *gaudi = hdev->asic_specific;
6464
6465         if (hdev->reset_info.hard_reset_pending)
6466                 return U64_MAX;
6467
6468         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6469                         (addr - gaudi->hbm_bar_cur_addr));
6470 }
6471
6472 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6473 {
6474         struct gaudi_device *gaudi = hdev->asic_specific;
6475
6476         if (hdev->reset_info.hard_reset_pending)
6477                 return;
6478
6479         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6480                         (addr - gaudi->hbm_bar_cur_addr));
6481 }
6482
6483 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6484 {
6485         /* mask to zero the MMBP and ASID bits */
6486         WREG32_AND(reg, ~0x7FF);
6487         WREG32_OR(reg, asid);
6488 }
6489
6490 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6491 {
6492         struct gaudi_device *gaudi = hdev->asic_specific;
6493
6494         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6495                 return;
6496
6497         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6498                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6499                 return;
6500         }
6501
6502         mutex_lock(&gaudi->clk_gate_mutex);
6503
6504         hdev->asic_funcs->disable_clock_gating(hdev);
6505
6506         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6507         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6508         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6509         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6510         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6511
6512         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6513         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6514         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6515         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6516         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6517
6518         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6519         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6520         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6521         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6522         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6523
6524         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6525         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6526         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6527         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6528         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6529
6530         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6531         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6532         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6533         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6534         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6535
6536         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6537         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6538         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6539         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6540         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6541
6542         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6543         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6544         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6545         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6547
6548         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6550         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6551         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6553
6554         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6555         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6556         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6557         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6560         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6562
6563         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6570
6571         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6578
6579         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6586
6587         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6594
6595         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6602
6603         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6610
6611         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6618
6619         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6626
6627         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6628         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6637
6638         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6644         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6649         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6650
6651         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6652                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6653                                 asid);
6654                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6655                                 asid);
6656                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6657                                 asid);
6658                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6659                                 asid);
6660                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6661                                 asid);
6662         }
6663
6664         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6665                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6666                                 asid);
6667                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6668                                 asid);
6669                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6670                                 asid);
6671                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6672                                 asid);
6673                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6674                                 asid);
6675         }
6676
6677         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6678                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6679                                 asid);
6680                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6681                                 asid);
6682                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6683                                 asid);
6684                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6685                                 asid);
6686                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6687                                 asid);
6688         }
6689
6690         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6691                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6692                                 asid);
6693                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6694                                 asid);
6695                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6696                                 asid);
6697                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6698                                 asid);
6699                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6700                                 asid);
6701         }
6702
6703         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6704                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6705                                 asid);
6706                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6707                                 asid);
6708                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6709                                 asid);
6710                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6711                                 asid);
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6713                                 asid);
6714         }
6715
6716         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6717                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6718                                 asid);
6719                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6720                                 asid);
6721                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6722                                 asid);
6723                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6724                                 asid);
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6726                                 asid);
6727         }
6728
6729         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6730                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6731                                 asid);
6732                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6733                                 asid);
6734                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6735                                 asid);
6736                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6737                                 asid);
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6739                                 asid);
6740         }
6741
6742         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6743                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6744                                 asid);
6745                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6746                                 asid);
6747                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6748                                 asid);
6749                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6750                                 asid);
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6752                                 asid);
6753         }
6754
6755         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6756                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6757                                 asid);
6758                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6759                                 asid);
6760                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6761                                 asid);
6762                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6763                                 asid);
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6765                                 asid);
6766         }
6767
6768         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6769                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6770                                 asid);
6771                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6772                                 asid);
6773                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6774                                 asid);
6775                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6776                                 asid);
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6778                                 asid);
6779         }
6780
6781         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6782         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6783
6784         hdev->asic_funcs->set_clock_gating(hdev);
6785
6786         mutex_unlock(&gaudi->clk_gate_mutex);
6787 }
6788
6789 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6790                 struct hl_cs_job *job)
6791 {
6792         struct packet_msg_prot *fence_pkt;
6793         u32 *fence_ptr;
6794         dma_addr_t fence_dma_addr;
6795         struct hl_cb *cb;
6796         u32 tmp, timeout, dma_offset;
6797         int rc;
6798
6799         if (hdev->pldm)
6800                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6801         else
6802                 timeout = HL_DEVICE_TIMEOUT_USEC;
6803
6804         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6805                 dev_err_ratelimited(hdev->dev,
6806                         "Can't send driver job on QMAN0 because the device is not idle\n");
6807                 return -EBUSY;
6808         }
6809
6810         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6811                                                         &fence_dma_addr);
6812         if (!fence_ptr) {
6813                 dev_err(hdev->dev,
6814                         "Failed to allocate fence memory for QMAN0\n");
6815                 return -ENOMEM;
6816         }
6817
6818         cb = job->patched_cb;
6819
6820         fence_pkt = cb->kernel_address +
6821                         job->job_cb_size - sizeof(struct packet_msg_prot);
6822
6823         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6824         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6825         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6826
6827         fence_pkt->ctl = cpu_to_le32(tmp);
6828         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6829         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6830
6831         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6832
6833         WREG32(mmDMA0_CORE_PROT + dma_offset,
6834                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6835
6836         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6837                                         job->job_cb_size, cb->bus_address);
6838         if (rc) {
6839                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6840                 goto free_fence_ptr;
6841         }
6842
6843         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6844                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6845                                 timeout, true);
6846
6847         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6848
6849         if (rc == -ETIMEDOUT) {
6850                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6851                 goto free_fence_ptr;
6852         }
6853
6854 free_fence_ptr:
6855         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6856
6857         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6858                                         fence_dma_addr);
6859         return rc;
6860 }
6861
6862 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6863 {
6864         if (event_type >= GAUDI_EVENT_SIZE)
6865                 goto event_not_supported;
6866
6867         if (!gaudi_irq_map_table[event_type].valid)
6868                 goto event_not_supported;
6869
6870         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6871
6872         return;
6873
6874 event_not_supported:
6875         snprintf(desc, size, "N/A");
6876 }
6877
6878 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6879                                                         bool is_write, s32 *engine_id_1,
6880                                                         s32 *engine_id_2)
6881 {
6882         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6883
6884         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6885                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6886
6887         switch (x_y) {
6888         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6889         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6890                 dma_id[0] = 0;
6891                 dma_id[1] = 2;
6892                 break;
6893         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6894         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6895                 dma_id[0] = 1;
6896                 dma_id[1] = 3;
6897                 break;
6898         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6899         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6900                 dma_id[0] = 4;
6901                 dma_id[1] = 6;
6902                 break;
6903         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6904         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6905                 dma_id[0] = 5;
6906                 dma_id[1] = 7;
6907                 break;
6908         default:
6909                 goto unknown_initiator;
6910         }
6911
6912         for (i = 0 ; i < 2 ; i++) {
6913                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6914                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6915         }
6916
6917         switch (x_y) {
6918         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6919         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6920                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6921                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6922                         return "DMA0";
6923                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6924                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6925                         return "DMA2";
6926                 } else {
6927                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6928                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6929                         return "DMA0 or DMA2";
6930                 }
6931         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6932         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6933                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6934                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6935                         return "DMA1";
6936                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6937                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6938                         return "DMA3";
6939                 } else {
6940                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6941                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6942                         return "DMA1 or DMA3";
6943                 }
6944         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6945         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6946                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6947                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6948                         return "DMA4";
6949                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6950                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6951                         return "DMA6";
6952                 } else {
6953                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6954                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6955                         return "DMA4 or DMA6";
6956                 }
6957         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6958         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6959                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6960                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6961                         return "DMA5";
6962                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6963                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6964                         return "DMA7";
6965                 } else {
6966                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6967                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6968                         return "DMA5 or DMA7";
6969                 }
6970         }
6971
6972 unknown_initiator:
6973         return "unknown initiator";
6974 }
6975
6976 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6977                                                         u32 *engine_id_1, u32 *engine_id_2)
6978 {
6979         u32 val, x_y, axi_id;
6980
6981         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6982                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6983         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6984                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6985         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6986                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6987
6988         switch (x_y) {
6989         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6990                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6991                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6992                         return "TPC0";
6993                 }
6994                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6995                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6996                         return "NIC0";
6997                 }
6998                 break;
6999         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7000                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
7001                 return "TPC1";
7002         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7003         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7004                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
7005                 return "MME0";
7006         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7007         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7008                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
7009                 return "MME1";
7010         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7011                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
7012                 return "TPC2";
7013         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7014                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7015                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
7016                         return "TPC3";
7017                 }
7018                 /* PCI, CPU or PSOC does not have engine id*/
7019                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7020                         return "PCI";
7021                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7022                         return "CPU";
7023                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7024                         return "PSOC";
7025                 break;
7026         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7027         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7028         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7029         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7030         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7031         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7032         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7033         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7034                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
7035                                 engine_id_1, engine_id_2);
7036         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7037                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7038                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
7039                         return "TPC4";
7040                 }
7041                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7042                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
7043                         return "NIC1";
7044                 }
7045                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7046                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
7047                         return "NIC2";
7048                 }
7049                 break;
7050         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7051                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
7052                 return "TPC5";
7053         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7054         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7055                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
7056                 return "MME2";
7057         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7058         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7059                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
7060                 return "MME3";
7061         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7062                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
7063                 return "TPC6";
7064         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7065                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7066                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
7067                         return "TPC7";
7068                 }
7069                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7070                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
7071                         return "NIC4";
7072                 }
7073                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7074                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
7075                         return "NIC5";
7076                 }
7077                 break;
7078         default:
7079                 break;
7080         }
7081
7082         dev_err(hdev->dev,
7083                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7084                 val,
7085                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7086                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7087                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7088                         RAZWI_INITIATOR_AXI_ID_MASK);
7089
7090         return "unknown initiator";
7091 }
7092
7093 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
7094                                                 u32 *engine_id_2)
7095 {
7096
7097         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7098                 dev_err_ratelimited(hdev->dev,
7099                         "RAZWI event caused by illegal write of %s\n",
7100                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
7101                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7102         }
7103
7104         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7105                 dev_err_ratelimited(hdev->dev,
7106                         "RAZWI event caused by illegal read of %s\n",
7107                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
7108                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7109         }
7110 }
7111
7112 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
7113 {
7114         struct gaudi_device *gaudi = hdev->asic_specific;
7115         u32 val;
7116
7117         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7118                 return;
7119
7120         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7121         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7122                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7123                 *addr <<= 32;
7124                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7125
7126                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
7127                 *type = HL_RAZWI_PAGE_FAULT;
7128
7129                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7130         }
7131
7132         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7133         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7134                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7135                 *addr <<= 32;
7136                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7137
7138                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
7139                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
7140
7141                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7142         }
7143 }
7144
7145 /*
7146  *  +-------------------+------------------------------------------------------+
7147  *  | Configuration Reg |                     Description                      |
7148  *  |      Address      |                                                      |
7149  *  +-------------------+------------------------------------------------------+
7150  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7151  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7152  *  |                   |0xF34 memory wrappers 63:32                           |
7153  *  |                   |0xF38 memory wrappers 95:64                           |
7154  *  |                   |0xF3C memory wrappers 127:96                          |
7155  *  +-------------------+------------------------------------------------------+
7156  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7157  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7158  *  |                   |0xF44 memory wrappers 63:32                           |
7159  *  |                   |0xF48 memory wrappers 95:64                           |
7160  *  |                   |0xF4C memory wrappers 127:96                          |
7161  *  +-------------------+------------------------------------------------------+
7162  */
7163 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7164                 struct ecc_info_extract_params *params, u64 *ecc_address,
7165                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7166 {
7167         struct gaudi_device *gaudi = hdev->asic_specific;
7168         u32 i, num_mem_regs, reg, err_bit;
7169         u64 err_addr, err_word = 0;
7170         int rc = 0;
7171
7172         num_mem_regs = params->num_memories / 32 +
7173                         ((params->num_memories % 32) ? 1 : 0);
7174
7175         if (params->block_address >= CFG_BASE)
7176                 params->block_address -= CFG_BASE;
7177
7178         if (params->derr)
7179                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7180         else
7181                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7182
7183         if (params->disable_clock_gating) {
7184                 mutex_lock(&gaudi->clk_gate_mutex);
7185                 hdev->asic_funcs->disable_clock_gating(hdev);
7186         }
7187
7188         /* Set invalid wrapper index */
7189         *memory_wrapper_idx = 0xFF;
7190
7191         /* Iterate through memory wrappers, a single bit must be set */
7192         for (i = 0 ; i < num_mem_regs ; i++) {
7193                 err_addr += i * 4;
7194                 err_word = RREG32(err_addr);
7195                 if (err_word) {
7196                         err_bit = __ffs(err_word);
7197                         *memory_wrapper_idx = err_bit + (32 * i);
7198                         break;
7199                 }
7200         }
7201
7202         if (*memory_wrapper_idx == 0xFF) {
7203                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7204                 rc = -EINVAL;
7205                 goto enable_clk_gate;
7206         }
7207
7208         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7209                         *memory_wrapper_idx);
7210
7211         *ecc_address =
7212                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7213         *ecc_syndrom =
7214                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7215
7216         /* Clear error indication */
7217         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7218         if (params->derr)
7219                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7220         else
7221                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7222
7223         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7224
7225 enable_clk_gate:
7226         if (params->disable_clock_gating) {
7227                 hdev->asic_funcs->set_clock_gating(hdev);
7228
7229                 mutex_unlock(&gaudi->clk_gate_mutex);
7230         }
7231
7232         return rc;
7233 }
7234
7235 /*
7236  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7237  *
7238  * @idx: the current pi/ci value
7239  * @q_len: the queue length (power of 2)
7240  *
7241  * @return the cyclically decremented index
7242  */
7243 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7244 {
7245         u32 mask = q_len - 1;
7246
7247         /*
7248          * modular decrement is equivalent to adding (queue_size -1)
7249          * later we take LSBs to make sure the value is in the
7250          * range [0, queue_len - 1]
7251          */
7252         return (idx + q_len - 1) & mask;
7253 }
7254
7255 /**
7256  * gaudi_print_sw_config_stream_data - print SW config stream data
7257  *
7258  * @hdev: pointer to the habanalabs device structure
7259  * @stream: the QMAN's stream
7260  * @qman_base: base address of QMAN registers block
7261  */
7262 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7263                                                 u64 qman_base)
7264 {
7265         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7266         u32 cq_ptr_lo_off, size;
7267
7268         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7269
7270         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7271                                                 stream * cq_ptr_lo_off;
7272         cq_ptr_hi = cq_ptr_lo +
7273                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7274         cq_tsize = cq_ptr_lo +
7275                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7276
7277         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7278         size = RREG32(cq_tsize);
7279         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7280                                                         stream, cq_ptr, size);
7281 }
7282
7283 /**
7284  * gaudi_print_last_pqes_on_err - print last PQEs on error
7285  *
7286  * @hdev: pointer to the habanalabs device structure
7287  * @qid_base: first QID of the QMAN (out of 4 streams)
7288  * @stream: the QMAN's stream
7289  * @qman_base: base address of QMAN registers block
7290  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7291  */
7292 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7293                                                 u32 stream, u64 qman_base,
7294                                                 bool pr_sw_conf)
7295 {
7296         u32 ci, qm_ci_stream_off, queue_len;
7297         struct hl_hw_queue *q;
7298         u64 pq_ci;
7299         int i;
7300
7301         q = &hdev->kernel_queues[qid_base + stream];
7302
7303         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7304         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7305                                                 stream * qm_ci_stream_off;
7306
7307         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7308                                         q->int_queue_len : HL_QUEUE_LENGTH;
7309
7310         hdev->asic_funcs->hw_queues_lock(hdev);
7311
7312         if (pr_sw_conf)
7313                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7314
7315         ci = RREG32(pq_ci);
7316
7317         /* we should start printing form ci -1 */
7318         ci = gaudi_queue_idx_dec(ci, queue_len);
7319
7320         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7321                 struct hl_bd *bd;
7322                 u64 addr;
7323                 u32 len;
7324
7325                 bd = q->kernel_address;
7326                 bd += ci;
7327
7328                 len = le32_to_cpu(bd->len);
7329                 /* len 0 means uninitialized entry- break */
7330                 if (!len)
7331                         break;
7332
7333                 addr = le64_to_cpu(bd->ptr);
7334
7335                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7336                                                         stream, ci, addr, len);
7337
7338                 /* get previous ci, wrap if needed */
7339                 ci = gaudi_queue_idx_dec(ci, queue_len);
7340         }
7341
7342         hdev->asic_funcs->hw_queues_unlock(hdev);
7343 }
7344
7345 /**
7346  * print_qman_data_on_err - extract QMAN data on error
7347  *
7348  * @hdev: pointer to the habanalabs device structure
7349  * @qid_base: first QID of the QMAN (out of 4 streams)
7350  * @stream: the QMAN's stream
7351  * @qman_base: base address of QMAN registers block
7352  *
7353  * This function attempt to exatract as much data as possible on QMAN error.
7354  * On upper CP print the SW config stream data and last 8 PQEs.
7355  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7356  */
7357 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7358                                                 u32 stream, u64 qman_base)
7359 {
7360         u32 i;
7361
7362         if (stream != QMAN_STREAMS) {
7363                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7364                                                                         true);
7365                 return;
7366         }
7367
7368         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7369
7370         for (i = 0; i < QMAN_STREAMS; i++)
7371                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7372                                                                         false);
7373 }
7374
7375 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7376                                           const char *qm_name,
7377                                           u64 qman_base,
7378                                           u32 qid_base)
7379 {
7380         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7381         u64 glbl_sts_addr, arb_err_addr;
7382         char reg_desc[32];
7383
7384         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7385         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7386
7387         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7388         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7389                 glbl_sts_clr_val = 0;
7390                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7391
7392                 if (!glbl_sts_val)
7393                         continue;
7394
7395                 if (i == QMAN_STREAMS)
7396                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7397                 else
7398                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7399
7400                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7401                         if (glbl_sts_val & BIT(j)) {
7402                                 dev_err_ratelimited(hdev->dev,
7403                                                 "%s %s. err cause: %s\n",
7404                                                 qm_name, reg_desc,
7405                                                 gaudi_qman_error_cause[j]);
7406                                 glbl_sts_clr_val |= BIT(j);
7407                         }
7408                 }
7409
7410                 /* Write 1 clear errors */
7411                 if (!hdev->stop_on_err)
7412                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7413                 else
7414                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7415         }
7416
7417         arb_err_val = RREG32(arb_err_addr);
7418
7419         if (!arb_err_val)
7420                 return;
7421
7422         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7423                 if (arb_err_val & BIT(j)) {
7424                         dev_err_ratelimited(hdev->dev,
7425                                         "%s ARB_ERR. err cause: %s\n",
7426                                         qm_name,
7427                                         gaudi_qman_arb_error_cause[j]);
7428                 }
7429         }
7430 }
7431
7432 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7433                 struct hl_eq_sm_sei_data *sei_data)
7434 {
7435         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7436
7437         /* Flip the bits as the enum is ordered in the opposite way */
7438         index = (index ^ 0x3) & 0x3;
7439
7440         switch (sei_data->sei_cause) {
7441         case SM_SEI_SO_OVERFLOW:
7442                 dev_err_ratelimited(hdev->dev,
7443                         "%s SEI Error: SOB Group %u overflow/underflow",
7444                         gaudi_sync_manager_names[index],
7445                         le32_to_cpu(sei_data->sei_log));
7446                 break;
7447         case SM_SEI_LBW_4B_UNALIGNED:
7448                 dev_err_ratelimited(hdev->dev,
7449                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7450                         gaudi_sync_manager_names[index],
7451                         le32_to_cpu(sei_data->sei_log));
7452                 break;
7453         case SM_SEI_AXI_RESPONSE_ERR:
7454                 dev_err_ratelimited(hdev->dev,
7455                         "%s SEI Error: AXI ID %u response error",
7456                         gaudi_sync_manager_names[index],
7457                         le32_to_cpu(sei_data->sei_log));
7458                 break;
7459         default:
7460                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7461                                 le32_to_cpu(sei_data->sei_log));
7462                 break;
7463         }
7464 }
7465
7466 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7467                 struct hl_eq_ecc_data *ecc_data)
7468 {
7469         struct ecc_info_extract_params params;
7470         u64 ecc_address = 0, ecc_syndrom = 0;
7471         u8 index, memory_wrapper_idx = 0;
7472         bool extract_info_from_fw;
7473         int rc;
7474
7475         if (hdev->asic_prop.fw_security_enabled) {
7476                 extract_info_from_fw = true;
7477                 goto extract_ecc_info;
7478         }
7479
7480         switch (event_type) {
7481         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7482         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7483                 extract_info_from_fw = true;
7484                 break;
7485         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7486                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7487                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7488                 params.num_memories = 90;
7489                 params.derr = false;
7490                 params.disable_clock_gating = true;
7491                 extract_info_from_fw = false;
7492                 break;
7493         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7494                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7495                 params.block_address =
7496                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7497                 params.num_memories = 90;
7498                 params.derr = true;
7499                 params.disable_clock_gating = true;
7500                 extract_info_from_fw = false;
7501                 break;
7502         case GAUDI_EVENT_MME0_ACC_SERR:
7503         case GAUDI_EVENT_MME1_ACC_SERR:
7504         case GAUDI_EVENT_MME2_ACC_SERR:
7505         case GAUDI_EVENT_MME3_ACC_SERR:
7506                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7507                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7508                 params.num_memories = 128;
7509                 params.derr = false;
7510                 params.disable_clock_gating = true;
7511                 extract_info_from_fw = false;
7512                 break;
7513         case GAUDI_EVENT_MME0_ACC_DERR:
7514         case GAUDI_EVENT_MME1_ACC_DERR:
7515         case GAUDI_EVENT_MME2_ACC_DERR:
7516         case GAUDI_EVENT_MME3_ACC_DERR:
7517                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7518                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7519                 params.num_memories = 128;
7520                 params.derr = true;
7521                 params.disable_clock_gating = true;
7522                 extract_info_from_fw = false;
7523                 break;
7524         case GAUDI_EVENT_MME0_SBAB_SERR:
7525         case GAUDI_EVENT_MME1_SBAB_SERR:
7526         case GAUDI_EVENT_MME2_SBAB_SERR:
7527         case GAUDI_EVENT_MME3_SBAB_SERR:
7528                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7529                 params.block_address =
7530                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7531                 params.num_memories = 33;
7532                 params.derr = false;
7533                 params.disable_clock_gating = true;
7534                 extract_info_from_fw = false;
7535                 break;
7536         case GAUDI_EVENT_MME0_SBAB_DERR:
7537         case GAUDI_EVENT_MME1_SBAB_DERR:
7538         case GAUDI_EVENT_MME2_SBAB_DERR:
7539         case GAUDI_EVENT_MME3_SBAB_DERR:
7540                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7541                 params.block_address =
7542                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7543                 params.num_memories = 33;
7544                 params.derr = true;
7545                 params.disable_clock_gating = true;
7546                 extract_info_from_fw = false;
7547                 break;
7548         default:
7549                 return;
7550         }
7551
7552 extract_ecc_info:
7553         if (extract_info_from_fw) {
7554                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7555                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7556                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7557         } else {
7558                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7559                                 &ecc_syndrom, &memory_wrapper_idx);
7560                 if (rc)
7561                         return;
7562         }
7563
7564         dev_err(hdev->dev,
7565                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7566                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7567 }
7568
7569 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7570 {
7571         u64 qman_base;
7572         char desc[32];
7573         u32 qid_base;
7574         u8 index;
7575
7576         switch (event_type) {
7577         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7578                 index = event_type - GAUDI_EVENT_TPC0_QM;
7579                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7580                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7581                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7582                 break;
7583         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7584                 index = event_type - GAUDI_EVENT_MME0_QM;
7585                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7586                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7587                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7588                 break;
7589         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7590                 index = event_type - GAUDI_EVENT_DMA0_QM;
7591                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7592                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7593                 if (index > 1)
7594                         qid_base++;
7595                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7596                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7597                 break;
7598         case GAUDI_EVENT_NIC0_QM0:
7599                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7600                 qman_base = mmNIC0_QM0_BASE;
7601                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7602                 break;
7603         case GAUDI_EVENT_NIC0_QM1:
7604                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7605                 qman_base = mmNIC0_QM1_BASE;
7606                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7607                 break;
7608         case GAUDI_EVENT_NIC1_QM0:
7609                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7610                 qman_base = mmNIC1_QM0_BASE;
7611                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7612                 break;
7613         case GAUDI_EVENT_NIC1_QM1:
7614                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7615                 qman_base = mmNIC1_QM1_BASE;
7616                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7617                 break;
7618         case GAUDI_EVENT_NIC2_QM0:
7619                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7620                 qman_base = mmNIC2_QM0_BASE;
7621                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7622                 break;
7623         case GAUDI_EVENT_NIC2_QM1:
7624                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7625                 qman_base = mmNIC2_QM1_BASE;
7626                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7627                 break;
7628         case GAUDI_EVENT_NIC3_QM0:
7629                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7630                 qman_base = mmNIC3_QM0_BASE;
7631                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7632                 break;
7633         case GAUDI_EVENT_NIC3_QM1:
7634                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7635                 qman_base = mmNIC3_QM1_BASE;
7636                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7637                 break;
7638         case GAUDI_EVENT_NIC4_QM0:
7639                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7640                 qman_base = mmNIC4_QM0_BASE;
7641                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7642                 break;
7643         case GAUDI_EVENT_NIC4_QM1:
7644                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7645                 qman_base = mmNIC4_QM1_BASE;
7646                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7647                 break;
7648         default:
7649                 return;
7650         }
7651
7652         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7653 }
7654
7655 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7656                                         bool razwi)
7657 {
7658         u32 engine_id_1, engine_id_2;
7659         char desc[64] = "";
7660         u64 razwi_addr = 0;
7661         u8 razwi_type;
7662         int rc;
7663
7664         /*
7665          * Init engine id by default as not valid and only if razwi initiated from engine with
7666          * engine id it will get valid value.
7667          * Init razwi type to default, will be changed only if razwi caused by page fault of
7668          * MMU access error
7669          */
7670         engine_id_1 = U16_MAX;
7671         engine_id_2 = U16_MAX;
7672         razwi_type = U8_MAX;
7673
7674         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7675         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7676                 event_type, desc);
7677
7678         if (razwi) {
7679                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7680                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7681
7682                 /* In case it's the first razwi, save its parameters*/
7683                 rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
7684                 if (!rc) {
7685                         hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
7686                         hdev->last_error.razwi_timestamp = ktime_get();
7687                         hdev->last_error.razwi_addr = razwi_addr;
7688                         hdev->last_error.razwi_engine_id_1 = engine_id_1;
7689                         hdev->last_error.razwi_engine_id_2 = engine_id_2;
7690                         /*
7691                          * If first engine id holds non valid value the razwi initiator
7692                          * does not have engine id
7693                          */
7694                         hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
7695                         hdev->last_error.razwi_type = razwi_type;
7696
7697                 }
7698         }
7699 }
7700
7701 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7702                                         struct cpucp_pkt_sync_err *sync_err)
7703 {
7704         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7705
7706         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7707                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7708 }
7709
7710 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7711                                         struct hl_eq_fw_alive *fw_alive)
7712 {
7713         dev_err(hdev->dev,
7714                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7715                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7716                 "Minor" : "Critical", fw_alive->process_id,
7717                 fw_alive->thread_id, fw_alive->uptime_seconds);
7718 }
7719
7720 static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
7721 {
7722         /* GAUDI doesn't support any reset except hard-reset */
7723         return -EPERM;
7724 }
7725
7726 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7727                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7728 {
7729         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7730         int rc = 0;
7731
7732         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7733                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7734                 if (!hbm_ecc_data) {
7735                         dev_err(hdev->dev, "No FW ECC data");
7736                         return 0;
7737                 }
7738
7739                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7740                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7741                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7742                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7743                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7744                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7745                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7746                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7747                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7748                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7749                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7750                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7751                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7752                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7753
7754                 dev_err(hdev->dev,
7755                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7756                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7757                 dev_err(hdev->dev,
7758                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7759                         device, ch, hbm_ecc_data->first_addr, type,
7760                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7761                         hbm_ecc_data->dec_cnt);
7762                 return 0;
7763         }
7764
7765         if (hdev->asic_prop.fw_security_enabled) {
7766                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7767                 return 0;
7768         }
7769
7770         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7771         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7772                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7773                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7774                 if (val) {
7775                         rc = -EIO;
7776                         dev_err(hdev->dev,
7777                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7778                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7779                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7780                                 (val >> 4) & 0x1);
7781
7782                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7783                         dev_err(hdev->dev,
7784                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7785                                 device, ch * 2,
7786                                 RREG32(base + ch * 0x1000 + 0x064),
7787                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7788                                 (val2 & 0xFF0000) >> 16,
7789                                 (val2 & 0xFF000000) >> 24);
7790                 }
7791
7792                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7793                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7794                 if (val) {
7795                         rc = -EIO;
7796                         dev_err(hdev->dev,
7797                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7798                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7799                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7800                                 (val >> 4) & 0x1);
7801
7802                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7803                         dev_err(hdev->dev,
7804                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7805                                 device, ch * 2 + 1,
7806                                 RREG32(base + ch * 0x1000 + 0x074),
7807                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7808                                 (val2 & 0xFF0000) >> 16,
7809                                 (val2 & 0xFF000000) >> 24);
7810                 }
7811
7812                 /* Clear interrupts */
7813                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7814                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7815                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7816                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7817                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7818                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7819         }
7820
7821         val  = RREG32(base + 0x8F30);
7822         val2 = RREG32(base + 0x8F34);
7823         if (val | val2) {
7824                 rc = -EIO;
7825                 dev_err(hdev->dev,
7826                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7827                         device, val, val2);
7828         }
7829         val  = RREG32(base + 0x8F40);
7830         val2 = RREG32(base + 0x8F44);
7831         if (val | val2) {
7832                 rc = -EIO;
7833                 dev_err(hdev->dev,
7834                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7835                         device, val, val2);
7836         }
7837
7838         return rc;
7839 }
7840
7841 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7842 {
7843         switch (hbm_event_type) {
7844         case GAUDI_EVENT_HBM0_SPI_0:
7845         case GAUDI_EVENT_HBM0_SPI_1:
7846                 return 0;
7847         case GAUDI_EVENT_HBM1_SPI_0:
7848         case GAUDI_EVENT_HBM1_SPI_1:
7849                 return 1;
7850         case GAUDI_EVENT_HBM2_SPI_0:
7851         case GAUDI_EVENT_HBM2_SPI_1:
7852                 return 2;
7853         case GAUDI_EVENT_HBM3_SPI_0:
7854         case GAUDI_EVENT_HBM3_SPI_1:
7855                 return 3;
7856         default:
7857                 break;
7858         }
7859
7860         /* Should never happen */
7861         return 0;
7862 }
7863
7864 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7865                                         char *interrupt_name)
7866 {
7867         struct gaudi_device *gaudi = hdev->asic_specific;
7868         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7869         bool soft_reset_required = false;
7870
7871         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7872          * gating, and thus cannot be done in CPU-CP and should be done instead
7873          * by the driver.
7874          */
7875
7876         mutex_lock(&gaudi->clk_gate_mutex);
7877
7878         hdev->asic_funcs->disable_clock_gating(hdev);
7879
7880         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7881                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7882
7883         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7884                 if (tpc_interrupts_cause & BIT(i)) {
7885                         dev_err_ratelimited(hdev->dev,
7886                                         "TPC%d_%s interrupt cause: %s\n",
7887                                         tpc_id, interrupt_name,
7888                                         gaudi_tpc_interrupts_cause[i]);
7889                         /* If this is QM error, we need to soft-reset */
7890                         if (i == 15)
7891                                 soft_reset_required = true;
7892                 }
7893
7894         /* Clear interrupts */
7895         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7896
7897         hdev->asic_funcs->set_clock_gating(hdev);
7898
7899         mutex_unlock(&gaudi->clk_gate_mutex);
7900
7901         return soft_reset_required;
7902 }
7903
7904 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7905 {
7906         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7907 }
7908
7909 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7910 {
7911         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7912 }
7913
7914 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7915                                         u16 event_type)
7916 {
7917         ktime_t zero_time = ktime_set(0, 0);
7918
7919         mutex_lock(&hdev->clk_throttling.lock);
7920
7921         switch (event_type) {
7922         case GAUDI_EVENT_FIX_POWER_ENV_S:
7923                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7924                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7925                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7926                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7927                 dev_info_ratelimited(hdev->dev,
7928                         "Clock throttling due to power consumption\n");
7929                 break;
7930
7931         case GAUDI_EVENT_FIX_POWER_ENV_E:
7932                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7933                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7934                 dev_info_ratelimited(hdev->dev,
7935                         "Power envelop is safe, back to optimal clock\n");
7936                 break;
7937
7938         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7939                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7940                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7941                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7942                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7943                 dev_info_ratelimited(hdev->dev,
7944                         "Clock throttling due to overheating\n");
7945                 break;
7946
7947         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7948                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7949                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7950                 dev_info_ratelimited(hdev->dev,
7951                         "Thermal envelop is safe, back to optimal clock\n");
7952                 break;
7953
7954         default:
7955                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7956                         event_type);
7957                 break;
7958         }
7959
7960         mutex_unlock(&hdev->clk_throttling.lock);
7961 }
7962
7963 static void gaudi_handle_eqe(struct hl_device *hdev,
7964                                 struct hl_eq_entry *eq_entry)
7965 {
7966         struct gaudi_device *gaudi = hdev->asic_specific;
7967         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7968         u32 fw_fatal_err_flag = 0;
7969         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7970                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7971         bool reset_required;
7972         u8 cause;
7973         int rc;
7974
7975         if (event_type >= GAUDI_EVENT_SIZE) {
7976                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7977                                 event_type, GAUDI_EVENT_SIZE - 1);
7978                 return;
7979         }
7980
7981         gaudi->events_stat[event_type]++;
7982         gaudi->events_stat_aggregate[event_type]++;
7983
7984         switch (event_type) {
7985         case GAUDI_EVENT_PCIE_CORE_DERR:
7986         case GAUDI_EVENT_PCIE_IF_DERR:
7987         case GAUDI_EVENT_PCIE_PHY_DERR:
7988         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7989         case GAUDI_EVENT_MME0_ACC_DERR:
7990         case GAUDI_EVENT_MME0_SBAB_DERR:
7991         case GAUDI_EVENT_MME1_ACC_DERR:
7992         case GAUDI_EVENT_MME1_SBAB_DERR:
7993         case GAUDI_EVENT_MME2_ACC_DERR:
7994         case GAUDI_EVENT_MME2_SBAB_DERR:
7995         case GAUDI_EVENT_MME3_ACC_DERR:
7996         case GAUDI_EVENT_MME3_SBAB_DERR:
7997         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7998                 fallthrough;
7999         case GAUDI_EVENT_CPU_IF_ECC_DERR:
8000         case GAUDI_EVENT_PSOC_MEM_DERR:
8001         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
8002         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
8003         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
8004         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
8005         case GAUDI_EVENT_MMU_DERR:
8006         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
8007                 gaudi_print_irq_info(hdev, event_type, true);
8008                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8009                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
8010                 goto reset_device;
8011
8012         case GAUDI_EVENT_GIC500:
8013         case GAUDI_EVENT_AXI_ECC:
8014         case GAUDI_EVENT_L2_RAM_ECC:
8015         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
8016                 gaudi_print_irq_info(hdev, event_type, false);
8017                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
8018                 goto reset_device;
8019
8020         case GAUDI_EVENT_HBM0_SPI_0:
8021         case GAUDI_EVENT_HBM1_SPI_0:
8022         case GAUDI_EVENT_HBM2_SPI_0:
8023         case GAUDI_EVENT_HBM3_SPI_0:
8024                 gaudi_print_irq_info(hdev, event_type, false);
8025                 gaudi_hbm_read_interrupts(hdev,
8026                                 gaudi_hbm_event_to_dev(event_type),
8027                                 &eq_entry->hbm_ecc_data);
8028                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
8029                 goto reset_device;
8030
8031         case GAUDI_EVENT_HBM0_SPI_1:
8032         case GAUDI_EVENT_HBM1_SPI_1:
8033         case GAUDI_EVENT_HBM2_SPI_1:
8034         case GAUDI_EVENT_HBM3_SPI_1:
8035                 gaudi_print_irq_info(hdev, event_type, false);
8036                 gaudi_hbm_read_interrupts(hdev,
8037                                 gaudi_hbm_event_to_dev(event_type),
8038                                 &eq_entry->hbm_ecc_data);
8039                 hl_fw_unmask_irq(hdev, event_type);
8040                 break;
8041
8042         case GAUDI_EVENT_TPC0_DEC:
8043         case GAUDI_EVENT_TPC1_DEC:
8044         case GAUDI_EVENT_TPC2_DEC:
8045         case GAUDI_EVENT_TPC3_DEC:
8046         case GAUDI_EVENT_TPC4_DEC:
8047         case GAUDI_EVENT_TPC5_DEC:
8048         case GAUDI_EVENT_TPC6_DEC:
8049         case GAUDI_EVENT_TPC7_DEC:
8050                 gaudi_print_irq_info(hdev, event_type, true);
8051                 reset_required = gaudi_tpc_read_interrupts(hdev,
8052                                         tpc_dec_event_to_tpc_id(event_type),
8053                                         "AXI_SLV_DEC_Error");
8054                 if (reset_required) {
8055                         dev_err(hdev->dev, "reset required due to %s\n",
8056                                 gaudi_irq_map_table[event_type].name);
8057
8058                         hl_device_reset(hdev, 0);
8059                 } else {
8060                         hl_fw_unmask_irq(hdev, event_type);
8061                 }
8062                 break;
8063
8064         case GAUDI_EVENT_TPC0_KRN_ERR:
8065         case GAUDI_EVENT_TPC1_KRN_ERR:
8066         case GAUDI_EVENT_TPC2_KRN_ERR:
8067         case GAUDI_EVENT_TPC3_KRN_ERR:
8068         case GAUDI_EVENT_TPC4_KRN_ERR:
8069         case GAUDI_EVENT_TPC5_KRN_ERR:
8070         case GAUDI_EVENT_TPC6_KRN_ERR:
8071         case GAUDI_EVENT_TPC7_KRN_ERR:
8072                 gaudi_print_irq_info(hdev, event_type, true);
8073                 reset_required = gaudi_tpc_read_interrupts(hdev,
8074                                         tpc_krn_event_to_tpc_id(event_type),
8075                                         "KRN_ERR");
8076                 if (reset_required) {
8077                         dev_err(hdev->dev, "reset required due to %s\n",
8078                                 gaudi_irq_map_table[event_type].name);
8079
8080                         hl_device_reset(hdev, 0);
8081                 } else {
8082                         hl_fw_unmask_irq(hdev, event_type);
8083                 }
8084                 break;
8085
8086         case GAUDI_EVENT_PCIE_CORE_SERR:
8087         case GAUDI_EVENT_PCIE_IF_SERR:
8088         case GAUDI_EVENT_PCIE_PHY_SERR:
8089         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8090         case GAUDI_EVENT_MME0_ACC_SERR:
8091         case GAUDI_EVENT_MME0_SBAB_SERR:
8092         case GAUDI_EVENT_MME1_ACC_SERR:
8093         case GAUDI_EVENT_MME1_SBAB_SERR:
8094         case GAUDI_EVENT_MME2_ACC_SERR:
8095         case GAUDI_EVENT_MME2_SBAB_SERR:
8096         case GAUDI_EVENT_MME3_ACC_SERR:
8097         case GAUDI_EVENT_MME3_SBAB_SERR:
8098         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8099         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8100         case GAUDI_EVENT_PSOC_MEM_SERR:
8101         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8102         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8103         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8104         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8105                 fallthrough;
8106         case GAUDI_EVENT_MMU_SERR:
8107                 gaudi_print_irq_info(hdev, event_type, true);
8108                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8109                 hl_fw_unmask_irq(hdev, event_type);
8110                 break;
8111
8112         case GAUDI_EVENT_PCIE_DEC:
8113         case GAUDI_EVENT_MME0_WBC_RSP:
8114         case GAUDI_EVENT_MME0_SBAB0_RSP:
8115         case GAUDI_EVENT_MME1_WBC_RSP:
8116         case GAUDI_EVENT_MME1_SBAB0_RSP:
8117         case GAUDI_EVENT_MME2_WBC_RSP:
8118         case GAUDI_EVENT_MME2_SBAB0_RSP:
8119         case GAUDI_EVENT_MME3_WBC_RSP:
8120         case GAUDI_EVENT_MME3_SBAB0_RSP:
8121         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8122         case GAUDI_EVENT_PSOC_AXI_DEC:
8123         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8124         case GAUDI_EVENT_MMU_PAGE_FAULT:
8125         case GAUDI_EVENT_MMU_WR_PERM:
8126         case GAUDI_EVENT_RAZWI_OR_ADC:
8127         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8128         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8129         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8130                 fallthrough;
8131         case GAUDI_EVENT_NIC0_QM0:
8132         case GAUDI_EVENT_NIC0_QM1:
8133         case GAUDI_EVENT_NIC1_QM0:
8134         case GAUDI_EVENT_NIC1_QM1:
8135         case GAUDI_EVENT_NIC2_QM0:
8136         case GAUDI_EVENT_NIC2_QM1:
8137         case GAUDI_EVENT_NIC3_QM0:
8138         case GAUDI_EVENT_NIC3_QM1:
8139         case GAUDI_EVENT_NIC4_QM0:
8140         case GAUDI_EVENT_NIC4_QM1:
8141         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8142                 gaudi_print_irq_info(hdev, event_type, true);
8143                 gaudi_handle_qman_err(hdev, event_type);
8144                 hl_fw_unmask_irq(hdev, event_type);
8145                 break;
8146
8147         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8148                 gaudi_print_irq_info(hdev, event_type, true);
8149                 goto reset_device;
8150
8151         case GAUDI_EVENT_TPC0_BMON_SPMU:
8152         case GAUDI_EVENT_TPC1_BMON_SPMU:
8153         case GAUDI_EVENT_TPC2_BMON_SPMU:
8154         case GAUDI_EVENT_TPC3_BMON_SPMU:
8155         case GAUDI_EVENT_TPC4_BMON_SPMU:
8156         case GAUDI_EVENT_TPC5_BMON_SPMU:
8157         case GAUDI_EVENT_TPC6_BMON_SPMU:
8158         case GAUDI_EVENT_TPC7_BMON_SPMU:
8159         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8160                 gaudi_print_irq_info(hdev, event_type, false);
8161                 hl_fw_unmask_irq(hdev, event_type);
8162                 break;
8163
8164         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8165                 gaudi_print_irq_info(hdev, event_type, false);
8166                 gaudi_print_sm_sei_info(hdev, event_type,
8167                                         &eq_entry->sm_sei_data);
8168                 rc = hl_state_dump(hdev);
8169                 if (rc)
8170                         dev_err(hdev->dev,
8171                                 "Error during system state dump %d\n", rc);
8172                 hl_fw_unmask_irq(hdev, event_type);
8173                 break;
8174
8175         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8176                 gaudi_print_clk_change_info(hdev, event_type);
8177                 hl_fw_unmask_irq(hdev, event_type);
8178                 break;
8179
8180         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8181                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8182                 dev_err(hdev->dev,
8183                         "Received high temp H/W interrupt %d (cause %d)\n",
8184                         event_type, cause);
8185                 break;
8186
8187         case GAUDI_EVENT_DEV_RESET_REQ:
8188                 gaudi_print_irq_info(hdev, event_type, false);
8189                 goto reset_device;
8190
8191         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8192                 gaudi_print_irq_info(hdev, event_type, false);
8193                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8194                 goto reset_device;
8195
8196         case GAUDI_EVENT_FW_ALIVE_S:
8197                 gaudi_print_irq_info(hdev, event_type, false);
8198                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8199                 goto reset_device;
8200
8201         default:
8202                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8203                                 event_type);
8204                 break;
8205         }
8206
8207         return;
8208
8209 reset_device:
8210         if (hdev->asic_prop.fw_security_enabled)
8211                 hl_device_reset(hdev, HL_DRV_RESET_HARD
8212                                         | HL_DRV_RESET_BYPASS_REQ_TO_FW
8213                                         | fw_fatal_err_flag);
8214         else if (hdev->hard_reset_on_fw_events)
8215                 hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
8216         else
8217                 hl_fw_unmask_irq(hdev, event_type);
8218 }
8219
8220 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8221                                         u32 *size)
8222 {
8223         struct gaudi_device *gaudi = hdev->asic_specific;
8224
8225         if (aggregate) {
8226                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8227                 return gaudi->events_stat_aggregate;
8228         }
8229
8230         *size = (u32) sizeof(gaudi->events_stat);
8231         return gaudi->events_stat;
8232 }
8233
8234 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8235                                         u32 flags)
8236 {
8237         struct gaudi_device *gaudi = hdev->asic_specific;
8238         u32 status, timeout_usec;
8239         int rc;
8240
8241         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8242                 hdev->reset_info.hard_reset_pending)
8243                 return 0;
8244
8245         if (hdev->pldm)
8246                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8247         else
8248                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8249
8250         /* L0 & L1 invalidation */
8251         WREG32(mmSTLB_INV_PS, 3);
8252         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8253         WREG32(mmSTLB_INV_PS, 2);
8254
8255         rc = hl_poll_timeout(
8256                 hdev,
8257                 mmSTLB_INV_PS,
8258                 status,
8259                 !status,
8260                 1000,
8261                 timeout_usec);
8262
8263         WREG32(mmSTLB_INV_SET, 0);
8264
8265         return rc;
8266 }
8267
8268 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8269                                                 bool is_hard, u32 flags,
8270                                                 u32 asid, u64 va, u64 size)
8271 {
8272         /* Treat as invalidate all because there is no range invalidation
8273          * in Gaudi
8274          */
8275         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8276 }
8277
8278 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8279                                         u32 asid, u64 phys_addr)
8280 {
8281         u32 status, timeout_usec;
8282         int rc;
8283
8284         if (hdev->pldm)
8285                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8286         else
8287                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8288
8289         WREG32(MMU_ASID, asid);
8290         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8291         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8292         WREG32(MMU_BUSY, 0x80000000);
8293
8294         rc = hl_poll_timeout(
8295                 hdev,
8296                 MMU_BUSY,
8297                 status,
8298                 !(status & 0x80000000),
8299                 1000,
8300                 timeout_usec);
8301
8302         if (rc) {
8303                 dev_err(hdev->dev,
8304                         "Timeout during MMU hop0 config of asid %d\n", asid);
8305                 return rc;
8306         }
8307
8308         return 0;
8309 }
8310
8311 static int gaudi_send_heartbeat(struct hl_device *hdev)
8312 {
8313         struct gaudi_device *gaudi = hdev->asic_specific;
8314
8315         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8316                 return 0;
8317
8318         return hl_fw_send_heartbeat(hdev);
8319 }
8320
8321 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8322 {
8323         struct gaudi_device *gaudi = hdev->asic_specific;
8324         struct asic_fixed_properties *prop = &hdev->asic_prop;
8325         int rc;
8326
8327         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8328                 return 0;
8329
8330         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8331                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8332                                         mmCPU_BOOT_ERR1);
8333         if (rc)
8334                 return rc;
8335
8336         if (!strlen(prop->cpucp_info.card_name))
8337                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8338                                 CARD_NAME_MAX_LEN);
8339
8340         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8341
8342         set_default_power_values(hdev);
8343
8344         hdev->max_power = prop->max_power_default;
8345
8346         return 0;
8347 }
8348
8349 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8350                                         u8 mask_len, struct seq_file *s)
8351 {
8352         struct gaudi_device *gaudi = hdev->asic_specific;
8353         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8354         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8355         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8356         unsigned long *mask = (unsigned long *)mask_arr;
8357         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8358         bool is_idle = true, is_eng_idle, is_slave;
8359         u64 offset;
8360         int i, dma_id, port;
8361
8362         mutex_lock(&gaudi->clk_gate_mutex);
8363
8364         hdev->asic_funcs->disable_clock_gating(hdev);
8365
8366         if (s)
8367                 seq_puts(s,
8368                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8369                         "---  -------  ------------  ----------  -------------\n");
8370
8371         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8372                 dma_id = gaudi_dma_assignment[i];
8373                 offset = dma_id * DMA_QMAN_OFFSET;
8374
8375                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8376                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8377                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8378                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8379                                 IS_DMA_IDLE(dma_core_sts0);
8380                 is_idle &= is_eng_idle;
8381
8382                 if (mask && !is_eng_idle)
8383                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8384                 if (s)
8385                         seq_printf(s, fmt, dma_id,
8386                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8387                                 qm_cgm_sts, dma_core_sts0);
8388         }
8389
8390         if (s)
8391                 seq_puts(s,
8392                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8393                         "---  -------  ------------  ----------  ----------\n");
8394
8395         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8396                 offset = i * TPC_QMAN_OFFSET;
8397                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8398                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8399                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8400                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8401                                 IS_TPC_IDLE(tpc_cfg_sts);
8402                 is_idle &= is_eng_idle;
8403
8404                 if (mask && !is_eng_idle)
8405                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8406                 if (s)
8407                         seq_printf(s, fmt, i,
8408                                 is_eng_idle ? "Y" : "N",
8409                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8410         }
8411
8412         if (s)
8413                 seq_puts(s,
8414                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8415                         "---  -------  ------------  ----------  -----------\n");
8416
8417         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8418                 offset = i * MME_QMAN_OFFSET;
8419                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8420                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8421
8422                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8423                 is_slave = i % 2;
8424                 if (!is_slave) {
8425                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8426                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8427                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8428                 }
8429
8430                 is_idle &= is_eng_idle;
8431
8432                 if (mask && !is_eng_idle)
8433                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8434                 if (s) {
8435                         if (!is_slave)
8436                                 seq_printf(s, fmt, i,
8437                                         is_eng_idle ? "Y" : "N",
8438                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8439                         else
8440                                 seq_printf(s, mme_slave_fmt, i,
8441                                         is_eng_idle ? "Y" : "N", "-",
8442                                         "-", mme_arch_sts);
8443                 }
8444         }
8445
8446         if (s)
8447                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8448                                 "---  -------  ------------  ----------\n");
8449
8450         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8451                 offset = i * NIC_MACRO_QMAN_OFFSET;
8452                 port = 2 * i;
8453                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8454                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8455                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8456                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8457                         is_idle &= is_eng_idle;
8458
8459                         if (mask && !is_eng_idle)
8460                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8461                         if (s)
8462                                 seq_printf(s, nic_fmt, port,
8463                                                 is_eng_idle ? "Y" : "N",
8464                                                 qm_glbl_sts0, qm_cgm_sts);
8465                 }
8466
8467                 port = 2 * i + 1;
8468                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8469                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8470                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8471                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8472                         is_idle &= is_eng_idle;
8473
8474                         if (mask && !is_eng_idle)
8475                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8476                         if (s)
8477                                 seq_printf(s, nic_fmt, port,
8478                                                 is_eng_idle ? "Y" : "N",
8479                                                 qm_glbl_sts0, qm_cgm_sts);
8480                 }
8481         }
8482
8483         if (s)
8484                 seq_puts(s, "\n");
8485
8486         hdev->asic_funcs->set_clock_gating(hdev);
8487
8488         mutex_unlock(&gaudi->clk_gate_mutex);
8489
8490         return is_idle;
8491 }
8492
8493 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8494         __acquires(&gaudi->hw_queues_lock)
8495 {
8496         struct gaudi_device *gaudi = hdev->asic_specific;
8497
8498         spin_lock(&gaudi->hw_queues_lock);
8499 }
8500
8501 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8502         __releases(&gaudi->hw_queues_lock)
8503 {
8504         struct gaudi_device *gaudi = hdev->asic_specific;
8505
8506         spin_unlock(&gaudi->hw_queues_lock);
8507 }
8508
8509 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8510 {
8511         return hdev->pdev->device;
8512 }
8513
8514 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8515                                 size_t max_size)
8516 {
8517         struct gaudi_device *gaudi = hdev->asic_specific;
8518
8519         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8520                 return 0;
8521
8522         return hl_fw_get_eeprom_data(hdev, data, max_size);
8523 }
8524
8525 /*
8526  * this function should be used only during initialization and/or after reset,
8527  * when there are no active users.
8528  */
8529 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8530                                 u32 tpc_id)
8531 {
8532         struct gaudi_device *gaudi = hdev->asic_specific;
8533         u64 kernel_timeout;
8534         u32 status, offset;
8535         int rc;
8536
8537         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8538
8539         if (hdev->pldm)
8540                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8541         else
8542                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8543
8544         mutex_lock(&gaudi->clk_gate_mutex);
8545
8546         hdev->asic_funcs->disable_clock_gating(hdev);
8547
8548         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8549                         lower_32_bits(tpc_kernel));
8550         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8551                         upper_32_bits(tpc_kernel));
8552
8553         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8554                         lower_32_bits(tpc_kernel));
8555         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8556                         upper_32_bits(tpc_kernel));
8557         /* set a valid LUT pointer, content is of no significance */
8558         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8559                         lower_32_bits(tpc_kernel));
8560         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8561                         upper_32_bits(tpc_kernel));
8562
8563         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8564                         lower_32_bits(CFG_BASE +
8565                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8566
8567         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8568                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8569                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8570         /* wait a bit for the engine to start executing */
8571         usleep_range(1000, 1500);
8572
8573         /* wait until engine has finished executing */
8574         rc = hl_poll_timeout(
8575                 hdev,
8576                 mmTPC0_CFG_STATUS + offset,
8577                 status,
8578                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8579                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8580                 1000,
8581                 kernel_timeout);
8582
8583         if (rc) {
8584                 dev_err(hdev->dev,
8585                         "Timeout while waiting for TPC%d icache prefetch\n",
8586                         tpc_id);
8587                 hdev->asic_funcs->set_clock_gating(hdev);
8588                 mutex_unlock(&gaudi->clk_gate_mutex);
8589                 return -EIO;
8590         }
8591
8592         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8593                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8594
8595         /* wait a bit for the engine to start executing */
8596         usleep_range(1000, 1500);
8597
8598         /* wait until engine has finished executing */
8599         rc = hl_poll_timeout(
8600                 hdev,
8601                 mmTPC0_CFG_STATUS + offset,
8602                 status,
8603                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8604                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8605                 1000,
8606                 kernel_timeout);
8607
8608         if (rc) {
8609                 dev_err(hdev->dev,
8610                         "Timeout while waiting for TPC%d vector pipe\n",
8611                         tpc_id);
8612                 hdev->asic_funcs->set_clock_gating(hdev);
8613                 mutex_unlock(&gaudi->clk_gate_mutex);
8614                 return -EIO;
8615         }
8616
8617         rc = hl_poll_timeout(
8618                 hdev,
8619                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8620                 status,
8621                 (status == 0),
8622                 1000,
8623                 kernel_timeout);
8624
8625         hdev->asic_funcs->set_clock_gating(hdev);
8626         mutex_unlock(&gaudi->clk_gate_mutex);
8627
8628         if (rc) {
8629                 dev_err(hdev->dev,
8630                         "Timeout while waiting for TPC%d kernel to execute\n",
8631                         tpc_id);
8632                 return -EIO;
8633         }
8634
8635         return 0;
8636 }
8637
8638 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8639                 struct hl_ctx *ctx)
8640 {
8641         struct gaudi_device *gaudi = hdev->asic_specific;
8642         int min_alloc_order, rc, collective_cb_size;
8643
8644         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8645                 return 0;
8646
8647         hdev->internal_cb_pool_virt_addr =
8648                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8649                                         HOST_SPACE_INTERNAL_CB_SZ,
8650                                         &hdev->internal_cb_pool_dma_addr,
8651                                         GFP_KERNEL | __GFP_ZERO);
8652
8653         if (!hdev->internal_cb_pool_virt_addr)
8654                 return -ENOMEM;
8655
8656         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8657                         sizeof(struct packet_fence);
8658         min_alloc_order = ilog2(collective_cb_size);
8659
8660         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8661         if (!hdev->internal_cb_pool) {
8662                 dev_err(hdev->dev,
8663                         "Failed to create internal CB pool\n");
8664                 rc = -ENOMEM;
8665                 goto free_internal_cb_pool;
8666         }
8667
8668         rc = gen_pool_add(hdev->internal_cb_pool,
8669                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8670                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8671         if (rc) {
8672                 dev_err(hdev->dev,
8673                         "Failed to add memory to internal CB pool\n");
8674                 rc = -EFAULT;
8675                 goto destroy_internal_cb_pool;
8676         }
8677
8678         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8679                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8680                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8681
8682         if (!hdev->internal_cb_va_base) {
8683                 rc = -ENOMEM;
8684                 goto destroy_internal_cb_pool;
8685         }
8686
8687         mutex_lock(&ctx->mmu_lock);
8688         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8689                         hdev->internal_cb_pool_dma_addr,
8690                         HOST_SPACE_INTERNAL_CB_SZ);
8691
8692         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8693         mutex_unlock(&ctx->mmu_lock);
8694
8695         if (rc)
8696                 goto unreserve_internal_cb_pool;
8697
8698         return 0;
8699
8700 unreserve_internal_cb_pool:
8701         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8702                         HOST_SPACE_INTERNAL_CB_SZ);
8703 destroy_internal_cb_pool:
8704         gen_pool_destroy(hdev->internal_cb_pool);
8705 free_internal_cb_pool:
8706         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8707                         HOST_SPACE_INTERNAL_CB_SZ,
8708                         hdev->internal_cb_pool_virt_addr,
8709                         hdev->internal_cb_pool_dma_addr);
8710
8711         return rc;
8712 }
8713
8714 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8715                 struct hl_ctx *ctx)
8716 {
8717         struct gaudi_device *gaudi = hdev->asic_specific;
8718
8719         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8720                 return;
8721
8722         mutex_lock(&ctx->mmu_lock);
8723         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8724                         HOST_SPACE_INTERNAL_CB_SZ);
8725         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8726                         HOST_SPACE_INTERNAL_CB_SZ);
8727         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8728         mutex_unlock(&ctx->mmu_lock);
8729
8730         gen_pool_destroy(hdev->internal_cb_pool);
8731
8732         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8733                         HOST_SPACE_INTERNAL_CB_SZ,
8734                         hdev->internal_cb_pool_virt_addr,
8735                         hdev->internal_cb_pool_dma_addr);
8736 }
8737
8738 static int gaudi_ctx_init(struct hl_ctx *ctx)
8739 {
8740         int rc;
8741
8742         if (ctx->asid == HL_KERNEL_ASID_ID)
8743                 return 0;
8744
8745         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8746         if (rc)
8747                 return rc;
8748
8749         rc = gaudi_restore_user_registers(ctx->hdev);
8750         if (rc)
8751                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8752
8753         return rc;
8754 }
8755
8756 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8757 {
8758         if (ctx->asid == HL_KERNEL_ASID_ID)
8759                 return;
8760
8761         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8762 }
8763
8764 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8765 {
8766         return gaudi_cq_assignment[cq_idx];
8767 }
8768
8769 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8770 {
8771         return sizeof(struct packet_msg_short) +
8772                         sizeof(struct packet_msg_prot) * 2;
8773 }
8774
8775 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8776 {
8777         return sizeof(struct packet_msg_short) * 4 +
8778                         sizeof(struct packet_fence) +
8779                         sizeof(struct packet_msg_prot) * 2;
8780 }
8781
8782 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8783 {
8784         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8785 }
8786
8787 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8788                                 u32 size, bool eb)
8789 {
8790         struct hl_cb *cb = (struct hl_cb *) data;
8791         struct packet_msg_short *pkt;
8792         u32 value, ctl, pkt_size = sizeof(*pkt);
8793
8794         pkt = cb->kernel_address + size;
8795         memset(pkt, 0, pkt_size);
8796
8797         /* Inc by 1, Mode ADD */
8798         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8799         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8800
8801         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8802         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8803         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8804         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8805         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8806         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8807         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8808
8809         pkt->value = cpu_to_le32(value);
8810         pkt->ctl = cpu_to_le32(ctl);
8811
8812         return size + pkt_size;
8813 }
8814
8815 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8816                                         u16 addr)
8817 {
8818         u32 ctl, pkt_size = sizeof(*pkt);
8819
8820         memset(pkt, 0, pkt_size);
8821
8822         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8823         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8824         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8825         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8826         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8827         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8828
8829         pkt->value = cpu_to_le32(value);
8830         pkt->ctl = cpu_to_le32(ctl);
8831
8832         return pkt_size;
8833 }
8834
8835 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8836                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8837                 u16 sob_val, u16 mon_id)
8838 {
8839         u64 monitor_base;
8840         u32 ctl, value, pkt_size = sizeof(*pkt);
8841         u16 msg_addr_offset;
8842         u8 mask;
8843
8844         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8845                 dev_err(hdev->dev,
8846                         "sob_base %u (mask %#x) is not valid\n",
8847                         sob_base, sob_mask);
8848                 return 0;
8849         }
8850
8851         /*
8852          * monitor_base should be the content of the base0 address registers,
8853          * so it will be added to the msg short offsets
8854          */
8855         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8856
8857         msg_addr_offset =
8858                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8859                                 monitor_base;
8860
8861         memset(pkt, 0, pkt_size);
8862
8863         /* Monitor config packet: bind the monitor to a sync object */
8864         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8865         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8866         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8867                         0); /* GREATER OR EQUAL*/
8868         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8869
8870         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8871         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8872         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8873         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8874         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8875         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8876         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8877
8878         pkt->value = cpu_to_le32(value);
8879         pkt->ctl = cpu_to_le32(ctl);
8880
8881         return pkt_size;
8882 }
8883
8884 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8885 {
8886         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8887
8888         memset(pkt, 0, pkt_size);
8889
8890         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8891         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8892         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8893
8894         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8895         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8896         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8897         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8898
8899         pkt->cfg = cpu_to_le32(cfg);
8900         pkt->ctl = cpu_to_le32(ctl);
8901
8902         return pkt_size;
8903 }
8904
8905 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8906 {
8907         u32 offset, nic_index;
8908
8909         switch (queue_id) {
8910         case GAUDI_QUEUE_ID_DMA_0_0:
8911                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8912                 break;
8913         case GAUDI_QUEUE_ID_DMA_0_1:
8914                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8915                 break;
8916         case GAUDI_QUEUE_ID_DMA_0_2:
8917                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8918                 break;
8919         case GAUDI_QUEUE_ID_DMA_0_3:
8920                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8921                 break;
8922         case GAUDI_QUEUE_ID_DMA_1_0:
8923                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8924                 break;
8925         case GAUDI_QUEUE_ID_DMA_1_1:
8926                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8927                 break;
8928         case GAUDI_QUEUE_ID_DMA_1_2:
8929                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8930                 break;
8931         case GAUDI_QUEUE_ID_DMA_1_3:
8932                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8933                 break;
8934         case GAUDI_QUEUE_ID_DMA_5_0:
8935                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8936                 break;
8937         case GAUDI_QUEUE_ID_DMA_5_1:
8938                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8939                 break;
8940         case GAUDI_QUEUE_ID_DMA_5_2:
8941                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8942                 break;
8943         case GAUDI_QUEUE_ID_DMA_5_3:
8944                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8945                 break;
8946         case GAUDI_QUEUE_ID_TPC_7_0:
8947                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8948                 break;
8949         case GAUDI_QUEUE_ID_TPC_7_1:
8950                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8951                 break;
8952         case GAUDI_QUEUE_ID_TPC_7_2:
8953                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8954                 break;
8955         case GAUDI_QUEUE_ID_TPC_7_3:
8956                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8957                 break;
8958         case GAUDI_QUEUE_ID_NIC_0_0:
8959         case GAUDI_QUEUE_ID_NIC_1_0:
8960         case GAUDI_QUEUE_ID_NIC_2_0:
8961         case GAUDI_QUEUE_ID_NIC_3_0:
8962         case GAUDI_QUEUE_ID_NIC_4_0:
8963         case GAUDI_QUEUE_ID_NIC_5_0:
8964         case GAUDI_QUEUE_ID_NIC_6_0:
8965         case GAUDI_QUEUE_ID_NIC_7_0:
8966         case GAUDI_QUEUE_ID_NIC_8_0:
8967         case GAUDI_QUEUE_ID_NIC_9_0:
8968                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8969                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8970                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8971                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8972                 break;
8973         case GAUDI_QUEUE_ID_NIC_0_1:
8974         case GAUDI_QUEUE_ID_NIC_1_1:
8975         case GAUDI_QUEUE_ID_NIC_2_1:
8976         case GAUDI_QUEUE_ID_NIC_3_1:
8977         case GAUDI_QUEUE_ID_NIC_4_1:
8978         case GAUDI_QUEUE_ID_NIC_5_1:
8979         case GAUDI_QUEUE_ID_NIC_6_1:
8980         case GAUDI_QUEUE_ID_NIC_7_1:
8981         case GAUDI_QUEUE_ID_NIC_8_1:
8982         case GAUDI_QUEUE_ID_NIC_9_1:
8983                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8984                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8985                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8986                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8987                 break;
8988         case GAUDI_QUEUE_ID_NIC_0_2:
8989         case GAUDI_QUEUE_ID_NIC_1_2:
8990         case GAUDI_QUEUE_ID_NIC_2_2:
8991         case GAUDI_QUEUE_ID_NIC_3_2:
8992         case GAUDI_QUEUE_ID_NIC_4_2:
8993         case GAUDI_QUEUE_ID_NIC_5_2:
8994         case GAUDI_QUEUE_ID_NIC_6_2:
8995         case GAUDI_QUEUE_ID_NIC_7_2:
8996         case GAUDI_QUEUE_ID_NIC_8_2:
8997         case GAUDI_QUEUE_ID_NIC_9_2:
8998                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8999                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
9000                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9001                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9002                 break;
9003         case GAUDI_QUEUE_ID_NIC_0_3:
9004         case GAUDI_QUEUE_ID_NIC_1_3:
9005         case GAUDI_QUEUE_ID_NIC_2_3:
9006         case GAUDI_QUEUE_ID_NIC_3_3:
9007         case GAUDI_QUEUE_ID_NIC_4_3:
9008         case GAUDI_QUEUE_ID_NIC_5_3:
9009         case GAUDI_QUEUE_ID_NIC_6_3:
9010         case GAUDI_QUEUE_ID_NIC_7_3:
9011         case GAUDI_QUEUE_ID_NIC_8_3:
9012         case GAUDI_QUEUE_ID_NIC_9_3:
9013                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
9014                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
9015                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9016                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9017                 break;
9018         default:
9019                 return -EINVAL;
9020         }
9021
9022         *addr = CFG_BASE + offset;
9023
9024         return 0;
9025 }
9026
9027 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
9028 {
9029         u64 monitor_base;
9030         u32 size = 0;
9031         u16 msg_addr_offset;
9032
9033         /*
9034          * monitor_base should be the content of the base0 address registers,
9035          * so it will be added to the msg short offsets
9036          */
9037         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9038
9039         /* First monitor config packet: low address of the sync */
9040         msg_addr_offset =
9041                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9042                                 monitor_base;
9043
9044         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9045                                         msg_addr_offset);
9046
9047         /* Second monitor config packet: high address of the sync */
9048         msg_addr_offset =
9049                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9050                                 monitor_base;
9051
9052         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9053                                         msg_addr_offset);
9054
9055         /*
9056          * Third monitor config packet: the payload, i.e. what to write when the
9057          * sync triggers
9058          */
9059         msg_addr_offset =
9060                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9061                                 monitor_base;
9062
9063         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9064
9065         return size;
9066 }
9067
9068 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9069                                 struct hl_gen_wait_properties *prop)
9070 {
9071         struct hl_cb *cb = (struct hl_cb *) prop->data;
9072         void *buf = cb->kernel_address;
9073         u64 fence_addr = 0;
9074         u32 size = prop->size;
9075
9076         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9077                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9078                                 prop->q_idx);
9079                 return 0;
9080         }
9081
9082         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9083         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9084                         prop->sob_mask, prop->sob_val, prop->mon_id);
9085         size += gaudi_add_fence_pkt(buf + size);
9086
9087         return size;
9088 }
9089
9090 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9091 {
9092         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9093
9094         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9095                 hw_sob->sob_id);
9096
9097         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9098                         hw_sob->sob_id * 4, 0);
9099
9100         kref_init(&hw_sob->kref);
9101 }
9102
9103 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9104 {
9105         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9106                                                         HL_POWER9_HOST_MAGIC) {
9107                 hdev->power9_64bit_dma_enable = 1;
9108                 hdev->dma_mask = 64;
9109         } else {
9110                 hdev->power9_64bit_dma_enable = 0;
9111                 hdev->dma_mask = 48;
9112         }
9113 }
9114
9115 static u64 gaudi_get_device_time(struct hl_device *hdev)
9116 {
9117         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9118
9119         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9120 }
9121
9122 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9123                                 u32 *block_size, u32 *block_id)
9124 {
9125         return -EPERM;
9126 }
9127
9128 static int gaudi_block_mmap(struct hl_device *hdev,
9129                                 struct vm_area_struct *vma,
9130                                 u32 block_id, u32 block_size)
9131 {
9132         return -EPERM;
9133 }
9134
9135 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9136 {
9137         struct cpu_dyn_regs *dyn_regs =
9138                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9139         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9140                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9141                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9142
9143         WREG32(irq_handler_offset,
9144                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9145 }
9146
9147 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9148 {
9149         switch (pll_idx) {
9150         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9151         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9152         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9153         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9154         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9155         case HL_GAUDI_MME_PLL: return MME_PLL;
9156         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9157         case HL_GAUDI_IF_PLL: return IF_PLL;
9158         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9159         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9160         default: return -EINVAL;
9161         }
9162 }
9163
9164 static int gaudi_add_sync_to_engine_map_entry(
9165         struct hl_sync_to_engine_map *map, u32 reg_value,
9166         enum hl_sync_engine_type engine_type, u32 engine_id)
9167 {
9168         struct hl_sync_to_engine_map_entry *entry;
9169
9170         /* Reg value represents a partial address of sync object,
9171          * it is used as unique identifier. For this we need to
9172          * clear the cutoff cfg base bits from the value.
9173          */
9174         if (reg_value == 0 || reg_value == 0xffffffff)
9175                 return 0;
9176         reg_value -= (u32)CFG_BASE;
9177
9178         /* create a new hash entry */
9179         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9180         if (!entry)
9181                 return -ENOMEM;
9182         entry->engine_type = engine_type;
9183         entry->engine_id = engine_id;
9184         entry->sync_id = reg_value;
9185         hash_add(map->tb, &entry->node, reg_value);
9186
9187         return 0;
9188 }
9189
9190 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9191                                 struct hl_sync_to_engine_map *map)
9192 {
9193         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9194         struct gaudi_device *gaudi = hdev->asic_specific;
9195         int i, j, rc;
9196         u32 reg_value;
9197
9198         /* Iterate over TPC engines */
9199         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9200                 /* TPC registered must be accessed with clock gating disabled */
9201                 mutex_lock(&gaudi->clk_gate_mutex);
9202                 hdev->asic_funcs->disable_clock_gating(hdev);
9203
9204                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9205                                         sds->props[SP_NEXT_TPC] * i);
9206
9207                 /* We can reenable clock_gating */
9208                 hdev->asic_funcs->set_clock_gating(hdev);
9209                 mutex_unlock(&gaudi->clk_gate_mutex);
9210
9211                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9212                                                         ENGINE_TPC, i);
9213                 if (rc)
9214                         goto free_sync_to_engine_map;
9215         }
9216
9217         /* Iterate over MME engines */
9218         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9219                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9220                         /* MME registered must be accessed with clock gating
9221                          * disabled
9222                          */
9223                         mutex_lock(&gaudi->clk_gate_mutex);
9224                         hdev->asic_funcs->disable_clock_gating(hdev);
9225
9226                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9227                                                 sds->props[SP_NEXT_MME] * i +
9228                                                 j * sizeof(u32));
9229
9230                         /* We can reenable clock_gating */
9231                         hdev->asic_funcs->set_clock_gating(hdev);
9232                         mutex_unlock(&gaudi->clk_gate_mutex);
9233
9234                         rc = gaudi_add_sync_to_engine_map_entry(
9235                                 map, reg_value, ENGINE_MME,
9236                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9237                         if (rc)
9238                                 goto free_sync_to_engine_map;
9239                 }
9240         }
9241
9242         /* Iterate over DMA engines */
9243         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9244                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9245                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9246                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9247                                                         ENGINE_DMA, i);
9248                 if (rc)
9249                         goto free_sync_to_engine_map;
9250         }
9251
9252         return 0;
9253
9254 free_sync_to_engine_map:
9255         hl_state_dump_free_sync_to_engine_map(map);
9256
9257         return rc;
9258 }
9259
9260 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9261 {
9262         return FIELD_GET(
9263                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9264                 mon->status);
9265 }
9266
9267 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9268 {
9269         const size_t max_write = 10;
9270         u32 gid, mask, sob;
9271         int i, offset;
9272
9273         /* Sync object ID is calculated as follows:
9274          * (8 * group_id + cleared bits in mask)
9275          */
9276         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9277                         mon->arm_data);
9278         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9279                         mon->arm_data);
9280
9281         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9282                 max_write; mask >>= 1, i++) {
9283                 if (!(mask & 1)) {
9284                         sob = gid * MONITOR_MAX_SOBS + i;
9285
9286                         if (offset > 0)
9287                                 offset += snprintf(sobs + offset, max_write,
9288                                                         ", ");
9289
9290                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9291                 }
9292         }
9293 }
9294
9295 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9296                                 struct hl_device *hdev,
9297                                 struct hl_mon_state_dump *mon)
9298 {
9299         const char *name;
9300         char scratch_buf1[BIN_REG_STRING_SIZE],
9301                 scratch_buf2[BIN_REG_STRING_SIZE];
9302         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9303
9304         name = hl_state_dump_get_monitor_name(hdev, mon);
9305         if (!name)
9306                 name = "";
9307
9308         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9309
9310         return hl_snprintf_resize(
9311                 buf, size, offset,
9312                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9313                 mon->id, name,
9314                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9315                                 mon->arm_data),
9316                 hl_format_as_binary(
9317                         scratch_buf1, sizeof(scratch_buf1),
9318                         FIELD_GET(
9319                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9320                                 mon->arm_data)),
9321                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9322                                 mon->arm_data),
9323                 mon->wr_data,
9324                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9325                 hl_format_as_binary(
9326                         scratch_buf2, sizeof(scratch_buf2),
9327                         FIELD_GET(
9328                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9329                                 mon->status)),
9330                 monitored_sobs);
9331 }
9332
9333
9334 static int gaudi_print_fences_single_engine(
9335         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9336         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9337         size_t *size, size_t *offset)
9338 {
9339         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9340         int rc = -ENOMEM, i;
9341         u32 *statuses, *fences;
9342
9343         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9344                         sizeof(*statuses), GFP_KERNEL);
9345         if (!statuses)
9346                 goto out;
9347
9348         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9349                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9350                          sizeof(*fences), GFP_KERNEL);
9351         if (!fences)
9352                 goto free_status;
9353
9354         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9355                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9356
9357         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9358                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9359                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9360
9361         /* The actual print */
9362         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9363                 u32 fence_id;
9364                 u64 fence_cnt, fence_rdata;
9365                 const char *engine_name;
9366
9367                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9368                         statuses[i]))
9369                         continue;
9370
9371                 fence_id =
9372                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9373                 fence_cnt = base_offset + CFG_BASE +
9374                         sizeof(u32) *
9375                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9376                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9377                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9378                 engine_name = hl_sync_engine_to_string(engine_type);
9379
9380                 rc = hl_snprintf_resize(
9381                         buf, size, offset,
9382                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9383                         engine_name, engine_id,
9384                         i, fence_id,
9385                         fence_cnt, engine_name, engine_id, fence_id, i,
9386                         fence_rdata, engine_name, engine_id, fence_id, i,
9387                         fences[fence_id],
9388                         statuses[i]);
9389                 if (rc)
9390                         goto free_fences;
9391         }
9392
9393         rc = 0;
9394
9395 free_fences:
9396         kfree(fences);
9397 free_status:
9398         kfree(statuses);
9399 out:
9400         return rc;
9401 }
9402
9403
9404 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9405         .monitor_valid = gaudi_monitor_valid,
9406         .print_single_monitor = gaudi_print_single_monitor,
9407         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9408         .print_fences_single_engine = gaudi_print_fences_single_engine,
9409 };
9410
9411 static void gaudi_state_dump_init(struct hl_device *hdev)
9412 {
9413         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9414         int i;
9415
9416         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9417                 hash_add(sds->so_id_to_str_tb,
9418                         &gaudi_so_id_to_str[i].node,
9419                         gaudi_so_id_to_str[i].id);
9420
9421         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9422                 hash_add(sds->monitor_id_to_str_tb,
9423                         &gaudi_monitor_id_to_str[i].node,
9424                         gaudi_monitor_id_to_str[i].id);
9425
9426         sds->props = gaudi_state_dump_specs_props;
9427
9428         sds->sync_namager_names = gaudi_sync_manager_names;
9429
9430         sds->funcs = gaudi_state_dump_funcs;
9431 }
9432
9433 static u32 *gaudi_get_stream_master_qid_arr(void)
9434 {
9435         return gaudi_stream_master;
9436 }
9437
9438 static const struct hl_asic_funcs gaudi_funcs = {
9439         .early_init = gaudi_early_init,
9440         .early_fini = gaudi_early_fini,
9441         .late_init = gaudi_late_init,
9442         .late_fini = gaudi_late_fini,
9443         .sw_init = gaudi_sw_init,
9444         .sw_fini = gaudi_sw_fini,
9445         .hw_init = gaudi_hw_init,
9446         .hw_fini = gaudi_hw_fini,
9447         .halt_engines = gaudi_halt_engines,
9448         .suspend = gaudi_suspend,
9449         .resume = gaudi_resume,
9450         .mmap = gaudi_mmap,
9451         .ring_doorbell = gaudi_ring_doorbell,
9452         .pqe_write = gaudi_pqe_write,
9453         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9454         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9455         .scrub_device_mem = gaudi_scrub_device_mem,
9456         .get_int_queue_base = gaudi_get_int_queue_base,
9457         .test_queues = gaudi_test_queues,
9458         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9459         .asic_dma_pool_free = gaudi_dma_pool_free,
9460         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9461         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9462         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9463         .cs_parser = gaudi_cs_parser,
9464         .asic_dma_map_sg = gaudi_dma_map_sg,
9465         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9466         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9467         .update_eq_ci = gaudi_update_eq_ci,
9468         .context_switch = gaudi_context_switch,
9469         .restore_phase_topology = gaudi_restore_phase_topology,
9470         .debugfs_read32 = gaudi_debugfs_read32,
9471         .debugfs_write32 = gaudi_debugfs_write32,
9472         .debugfs_read64 = gaudi_debugfs_read64,
9473         .debugfs_write64 = gaudi_debugfs_write64,
9474         .debugfs_read_dma = gaudi_debugfs_read_dma,
9475         .add_device_attr = hl_add_device_attr,
9476         .handle_eqe = gaudi_handle_eqe,
9477         .set_pll_profile = hl_set_pll_profile,
9478         .get_events_stat = gaudi_get_events_stat,
9479         .read_pte = gaudi_read_pte,
9480         .write_pte = gaudi_write_pte,
9481         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9482         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9483         .send_heartbeat = gaudi_send_heartbeat,
9484         .set_clock_gating = gaudi_set_clock_gating,
9485         .disable_clock_gating = gaudi_disable_clock_gating,
9486         .debug_coresight = gaudi_debug_coresight,
9487         .is_device_idle = gaudi_is_device_idle,
9488         .non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
9489         .hw_queues_lock = gaudi_hw_queues_lock,
9490         .hw_queues_unlock = gaudi_hw_queues_unlock,
9491         .get_pci_id = gaudi_get_pci_id,
9492         .get_eeprom_data = gaudi_get_eeprom_data,
9493         .send_cpu_message = gaudi_send_cpu_message,
9494         .pci_bars_map = gaudi_pci_bars_map,
9495         .init_iatu = gaudi_init_iatu,
9496         .rreg = hl_rreg,
9497         .wreg = hl_wreg,
9498         .halt_coresight = gaudi_halt_coresight,
9499         .ctx_init = gaudi_ctx_init,
9500         .ctx_fini = gaudi_ctx_fini,
9501         .get_clk_rate = hl_get_clk_rate,
9502         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9503         .load_firmware_to_device = gaudi_load_firmware_to_device,
9504         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9505         .get_signal_cb_size = gaudi_get_signal_cb_size,
9506         .get_wait_cb_size = gaudi_get_wait_cb_size,
9507         .gen_signal_cb = gaudi_gen_signal_cb,
9508         .gen_wait_cb = gaudi_gen_wait_cb,
9509         .reset_sob = gaudi_reset_sob,
9510         .reset_sob_group = gaudi_reset_sob_group,
9511         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9512         .get_device_time = gaudi_get_device_time,
9513         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9514         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9515         .scramble_addr = hl_mmu_scramble_addr,
9516         .descramble_addr = hl_mmu_descramble_addr,
9517         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9518         .get_hw_block_id = gaudi_get_hw_block_id,
9519         .hw_block_mmap = gaudi_block_mmap,
9520         .enable_events_from_fw = gaudi_enable_events_from_fw,
9521         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9522         .init_firmware_loader = gaudi_init_firmware_loader,
9523         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9524         .state_dump_init = gaudi_state_dump_init,
9525         .get_sob_addr = gaudi_get_sob_addr,
9526         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9527         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9528 };
9529
9530 /**
9531  * gaudi_set_asic_funcs - set GAUDI function pointers
9532  *
9533  * @hdev: pointer to hl_device structure
9534  *
9535  */
9536 void gaudi_set_asic_funcs(struct hl_device *hdev)
9537 {
9538         hdev->asic_funcs = &gaudi_funcs;
9539 }