habanalabs: adding indication of boot fit loaded
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2021 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461         bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465                                                                 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467                                         struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469                                         u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471                                         u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473                                 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479                                 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481                                 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486                 return HL_COLLECTIVE_MASTER;
487
488         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490                 return HL_COLLECTIVE_SLAVE;
491
492         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494                 return HL_COLLECTIVE_SLAVE;
495
496         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498                 return HL_COLLECTIVE_SLAVE;
499
500         return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507         if (hdev->card_type == cpucp_card_type_pmc) {
508                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510                 if (prop->fw_security_enabled)
511                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512                 else
513                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514         } else {
515                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517         }
518 }
519
520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522         struct asic_fixed_properties *prop = &hdev->asic_prop;
523         u32 num_sync_stream_queues = 0;
524         int i;
525
526         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527         prop->hw_queues_props = kcalloc(prop->max_queues,
528                         sizeof(struct hw_queue_properties),
529                         GFP_KERNEL);
530
531         if (!prop->hw_queues_props)
532                 return -ENOMEM;
533
534         for (i = 0 ; i < prop->max_queues ; i++) {
535                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537                         prop->hw_queues_props[i].driver_only = 0;
538                         prop->hw_queues_props[i].supports_sync_stream = 1;
539                         prop->hw_queues_props[i].cb_alloc_flags =
540                                 CB_ALLOC_KERNEL;
541                         num_sync_stream_queues++;
542                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544                         prop->hw_queues_props[i].driver_only = 1;
545                         prop->hw_queues_props[i].supports_sync_stream = 0;
546                         prop->hw_queues_props[i].cb_alloc_flags =
547                                 CB_ALLOC_KERNEL;
548                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550                         prop->hw_queues_props[i].driver_only = 0;
551                         prop->hw_queues_props[i].supports_sync_stream = 0;
552                         prop->hw_queues_props[i].cb_alloc_flags =
553                                 CB_ALLOC_USER;
554
555                 }
556                 prop->hw_queues_props[i].collective_mode =
557                                                 get_collective_mode(hdev, i);
558         }
559
560         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562         prop->collective_first_sob = 0;
563         prop->collective_first_mon = 0;
564
565         /* 2 SOBs per internal queue stream are reserved for collective */
566         prop->sync_stream_first_sob =
567                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568                         * QMAN_STREAMS * HL_RSVD_SOBS;
569
570         /* 1 monitor per internal queue stream are reserved for collective
571          * 2 monitors per external queue stream are reserved for collective
572          */
573         prop->sync_stream_first_mon =
574                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575                         (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577         prop->dram_base_address = DRAM_PHYS_BASE;
578         prop->dram_size = GAUDI_HBM_SIZE_32GB;
579         prop->dram_end_address = prop->dram_base_address +
580                                         prop->dram_size;
581         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583         prop->sram_base_address = SRAM_BASE_ADDR;
584         prop->sram_size = SRAM_SIZE;
585         prop->sram_end_address = prop->sram_base_address +
586                                         prop->sram_size;
587         prop->sram_user_base_address = prop->sram_base_address +
588                                         SRAM_USER_BASE_OFFSET;
589
590         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591         if (hdev->pldm)
592                 prop->mmu_pgt_size = 0x800000; /* 8MB */
593         else
594                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595         prop->mmu_pte_size = HL_PTE_SIZE;
596         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598         prop->dram_page_size = PAGE_SIZE_2MB;
599         prop->dram_supports_virtual_memory = false;
600
601         prop->pmmu.hop0_shift = HOP0_SHIFT;
602         prop->pmmu.hop1_shift = HOP1_SHIFT;
603         prop->pmmu.hop2_shift = HOP2_SHIFT;
604         prop->pmmu.hop3_shift = HOP3_SHIFT;
605         prop->pmmu.hop4_shift = HOP4_SHIFT;
606         prop->pmmu.hop0_mask = HOP0_MASK;
607         prop->pmmu.hop1_mask = HOP1_MASK;
608         prop->pmmu.hop2_mask = HOP2_MASK;
609         prop->pmmu.hop3_mask = HOP3_MASK;
610         prop->pmmu.hop4_mask = HOP4_MASK;
611         prop->pmmu.start_addr = VA_HOST_SPACE_START;
612         prop->pmmu.end_addr =
613                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614         prop->pmmu.page_size = PAGE_SIZE_4KB;
615         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616         prop->pmmu.last_mask = LAST_MASK;
617
618         /* PMMU and HPMMU are the same except of page size */
619         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
620         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
621
622         /* shifts and masks are the same in PMMU and DMMU */
623         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
624         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
625         prop->dmmu.end_addr = VA_HOST_SPACE_END;
626         prop->dmmu.page_size = PAGE_SIZE_2MB;
627
628         prop->cfg_size = CFG_SIZE;
629         prop->max_asid = MAX_ASID;
630         prop->num_of_events = GAUDI_EVENT_SIZE;
631         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
632
633         set_default_power_values(hdev);
634
635         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
636         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
637
638         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
639         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
640
641         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
642                                         CARD_NAME_MAX_LEN);
643
644         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
645
646         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
647                         prop->sync_stream_first_sob +
648                         (num_sync_stream_queues * HL_RSVD_SOBS);
649         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
650                         prop->sync_stream_first_mon +
651                         (num_sync_stream_queues * HL_RSVD_MONS);
652
653         prop->first_available_user_msix_interrupt = USHRT_MAX;
654
655         for (i = 0 ; i < HL_MAX_DCORES ; i++)
656                 prop->first_available_cq[i] = USHRT_MAX;
657
658         prop->fw_cpu_boot_dev_sts0_valid = false;
659         prop->fw_cpu_boot_dev_sts1_valid = false;
660         prop->hard_reset_done_by_fw = false;
661         prop->gic_interrupts_enable = true;
662
663         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
664
665         prop->clk_pll_index = HL_GAUDI_MME_PLL;
666         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
667
668         return 0;
669 }
670
671 static int gaudi_pci_bars_map(struct hl_device *hdev)
672 {
673         static const char * const name[] = {"SRAM", "CFG", "HBM"};
674         bool is_wc[3] = {false, false, true};
675         int rc;
676
677         rc = hl_pci_bars_map(hdev, name, is_wc);
678         if (rc)
679                 return rc;
680
681         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
682                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
683
684         return 0;
685 }
686
687 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
688 {
689         struct gaudi_device *gaudi = hdev->asic_specific;
690         struct hl_inbound_pci_region pci_region;
691         u64 old_addr = addr;
692         int rc;
693
694         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
695                 return old_addr;
696
697         if (hdev->asic_prop.iatu_done_by_fw)
698                 return U64_MAX;
699
700         /* Inbound Region 2 - Bar 4 - Point to HBM */
701         pci_region.mode = PCI_BAR_MATCH_MODE;
702         pci_region.bar = HBM_BAR_ID;
703         pci_region.addr = addr;
704         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
705         if (rc)
706                 return U64_MAX;
707
708         if (gaudi) {
709                 old_addr = gaudi->hbm_bar_cur_addr;
710                 gaudi->hbm_bar_cur_addr = addr;
711         }
712
713         return old_addr;
714 }
715
716 static int gaudi_init_iatu(struct hl_device *hdev)
717 {
718         struct hl_inbound_pci_region inbound_region;
719         struct hl_outbound_pci_region outbound_region;
720         int rc;
721
722         if (hdev->asic_prop.iatu_done_by_fw)
723                 return 0;
724
725         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
726         inbound_region.mode = PCI_BAR_MATCH_MODE;
727         inbound_region.bar = SRAM_BAR_ID;
728         inbound_region.addr = SRAM_BASE_ADDR;
729         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
730         if (rc)
731                 goto done;
732
733         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
734         inbound_region.mode = PCI_BAR_MATCH_MODE;
735         inbound_region.bar = CFG_BAR_ID;
736         inbound_region.addr = SPI_FLASH_BASE_ADDR;
737         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
738         if (rc)
739                 goto done;
740
741         /* Inbound Region 2 - Bar 4 - Point to HBM */
742         inbound_region.mode = PCI_BAR_MATCH_MODE;
743         inbound_region.bar = HBM_BAR_ID;
744         inbound_region.addr = DRAM_PHYS_BASE;
745         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
746         if (rc)
747                 goto done;
748
749         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
750
751         /* Outbound Region 0 - Point to Host */
752         outbound_region.addr = HOST_PHYS_BASE;
753         outbound_region.size = HOST_PHYS_SIZE;
754         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
755
756 done:
757         return rc;
758 }
759
760 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
761 {
762         return RREG32(mmHW_STATE);
763 }
764
765 static int gaudi_early_init(struct hl_device *hdev)
766 {
767         struct asic_fixed_properties *prop = &hdev->asic_prop;
768         struct pci_dev *pdev = hdev->pdev;
769         u32 fw_boot_status;
770         int rc;
771
772         rc = gaudi_set_fixed_properties(hdev);
773         if (rc) {
774                 dev_err(hdev->dev, "Failed setting fixed properties\n");
775                 return rc;
776         }
777
778         /* Check BAR sizes */
779         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
780                 dev_err(hdev->dev,
781                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
782                         SRAM_BAR_ID,
783                         (unsigned long long) pci_resource_len(pdev,
784                                                         SRAM_BAR_ID),
785                         SRAM_BAR_SIZE);
786                 rc = -ENODEV;
787                 goto free_queue_props;
788         }
789
790         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
791                 dev_err(hdev->dev,
792                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
793                         CFG_BAR_ID,
794                         (unsigned long long) pci_resource_len(pdev,
795                                                                 CFG_BAR_ID),
796                         CFG_BAR_SIZE);
797                 rc = -ENODEV;
798                 goto free_queue_props;
799         }
800
801         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
802         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
803
804         /* If FW security is enabled at this point it means no access to ELBI */
805         if (hdev->asic_prop.fw_security_enabled) {
806                 hdev->asic_prop.iatu_done_by_fw = true;
807
808                 /*
809                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
810                  * decision can only be taken based on PCI ID security.
811                  */
812                 hdev->asic_prop.gic_interrupts_enable = false;
813                 goto pci_init;
814         }
815
816         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
817                                 &fw_boot_status);
818         if (rc)
819                 goto free_queue_props;
820
821         /* Check whether FW is configuring iATU */
822         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
823                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
824                 hdev->asic_prop.iatu_done_by_fw = true;
825
826 pci_init:
827         rc = hl_pci_init(hdev);
828         if (rc)
829                 goto free_queue_props;
830
831         /* Before continuing in the initialization, we need to read the preboot
832          * version to determine whether we run with a security-enabled firmware
833          */
834         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
835                                         mmCPU_BOOT_DEV_STS0,
836                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
837                                         mmCPU_BOOT_ERR1,
838                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
839         if (rc) {
840                 if (hdev->reset_on_preboot_fail)
841                         hdev->asic_funcs->hw_fini(hdev, true, false);
842                 goto pci_fini;
843         }
844
845         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
846                 dev_info(hdev->dev,
847                         "H/W state is dirty, must reset before initializing\n");
848                 hdev->asic_funcs->hw_fini(hdev, true, false);
849         }
850
851         return 0;
852
853 pci_fini:
854         hl_pci_fini(hdev);
855 free_queue_props:
856         kfree(hdev->asic_prop.hw_queues_props);
857         return rc;
858 }
859
860 static int gaudi_early_fini(struct hl_device *hdev)
861 {
862         kfree(hdev->asic_prop.hw_queues_props);
863         hl_pci_fini(hdev);
864
865         return 0;
866 }
867
868 /**
869  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
870  *
871  * @hdev: pointer to hl_device structure
872  *
873  */
874 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
875 {
876         struct asic_fixed_properties *prop = &hdev->asic_prop;
877         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
878         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
879         int rc;
880
881         if (hdev->asic_prop.fw_security_enabled) {
882                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
883
884                 if (rc)
885                         return rc;
886
887                 freq = pll_freq_arr[2];
888         } else {
889                 /* Backward compatibility */
890                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
891                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
892                 nr = RREG32(mmPSOC_CPU_PLL_NR);
893                 nf = RREG32(mmPSOC_CPU_PLL_NF);
894                 od = RREG32(mmPSOC_CPU_PLL_OD);
895
896                 if (div_sel == DIV_SEL_REF_CLK ||
897                                 div_sel == DIV_SEL_DIVIDED_REF) {
898                         if (div_sel == DIV_SEL_REF_CLK)
899                                 freq = PLL_REF_CLK;
900                         else
901                                 freq = PLL_REF_CLK / (div_fctr + 1);
902                 } else if (div_sel == DIV_SEL_PLL_CLK ||
903                         div_sel == DIV_SEL_DIVIDED_PLL) {
904                         pll_clk = PLL_REF_CLK * (nf + 1) /
905                                         ((nr + 1) * (od + 1));
906                         if (div_sel == DIV_SEL_PLL_CLK)
907                                 freq = pll_clk;
908                         else
909                                 freq = pll_clk / (div_fctr + 1);
910                 } else {
911                         dev_warn(hdev->dev,
912                                 "Received invalid div select value: %d",
913                                 div_sel);
914                         freq = 0;
915                 }
916         }
917
918         prop->psoc_timestamp_frequency = freq;
919         prop->psoc_pci_pll_nr = nr;
920         prop->psoc_pci_pll_nf = nf;
921         prop->psoc_pci_pll_od = od;
922         prop->psoc_pci_pll_div_factor = div_fctr;
923
924         return 0;
925 }
926
927 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
928                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
929 {
930         struct asic_fixed_properties *prop = &hdev->asic_prop;
931         struct packet_lin_dma *init_tpc_mem_pkt;
932         struct hl_cs_job *job;
933         struct hl_cb *cb;
934         u64 dst_addr;
935         u32 cb_size, ctl;
936         u8 tpc_id;
937         int rc;
938
939         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
940         if (!cb)
941                 return -EFAULT;
942
943         init_tpc_mem_pkt = cb->kernel_address;
944         cb_size = sizeof(*init_tpc_mem_pkt);
945         memset(init_tpc_mem_pkt, 0, cb_size);
946
947         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
948
949         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
950         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
951         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
952         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
953
954         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
955
956         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
957         dst_addr = (prop->sram_user_base_address &
958                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
959                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
960         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
961
962         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
963         if (!job) {
964                 dev_err(hdev->dev, "Failed to allocate a new job\n");
965                 rc = -ENOMEM;
966                 goto release_cb;
967         }
968
969         job->id = 0;
970         job->user_cb = cb;
971         atomic_inc(&job->user_cb->cs_cnt);
972         job->user_cb_size = cb_size;
973         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
974         job->patched_cb = job->user_cb;
975         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
976
977         hl_debugfs_add_job(hdev, job);
978
979         rc = gaudi_send_job_on_qman0(hdev, job);
980
981         if (rc)
982                 goto free_job;
983
984         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
985                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
986                 if (rc)
987                         break;
988         }
989
990 free_job:
991         hl_userptr_delete_list(hdev, &job->userptr_list);
992         hl_debugfs_remove_job(hdev, job);
993         kfree(job);
994         atomic_dec(&cb->cs_cnt);
995
996 release_cb:
997         hl_cb_put(cb);
998         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
999
1000         return rc;
1001 }
1002
1003 /*
1004  * gaudi_init_tpc_mem() - Initialize TPC memories.
1005  * @hdev: Pointer to hl_device structure.
1006  *
1007  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1008  *
1009  * Return: 0 for success, negative value for error.
1010  */
1011 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1012 {
1013         const struct firmware *fw;
1014         size_t fw_size;
1015         void *cpu_addr;
1016         dma_addr_t dma_handle;
1017         int rc, count = 5;
1018
1019 again:
1020         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1021         if (rc == -EINTR && count-- > 0) {
1022                 msleep(50);
1023                 goto again;
1024         }
1025
1026         if (rc) {
1027                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1028                                 GAUDI_TPC_FW_FILE);
1029                 goto out;
1030         }
1031
1032         fw_size = fw->size;
1033         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1034                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1035         if (!cpu_addr) {
1036                 dev_err(hdev->dev,
1037                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1038                         fw_size);
1039                 rc = -ENOMEM;
1040                 goto out;
1041         }
1042
1043         memcpy(cpu_addr, fw->data, fw_size);
1044
1045         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1046
1047         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1048                         dma_handle);
1049
1050 out:
1051         release_firmware(fw);
1052         return rc;
1053 }
1054
1055 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1056 {
1057         struct gaudi_device *gaudi = hdev->asic_specific;
1058         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1059         struct hl_hw_queue *q;
1060         u32 i, sob_id, sob_group_id, queue_id;
1061
1062         /* Iterate through SOB groups and assign a SOB for each slave queue */
1063         sob_group_id =
1064                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1065         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1066
1067         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1068         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1069                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1070                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1071         }
1072
1073         /* Both DMA5 and TPC7 use the same resources since only a single
1074          * engine need to participate in the reduction process
1075          */
1076         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1077         q = &hdev->kernel_queues[queue_id];
1078         q->sync_stream_prop.collective_sob_id =
1079                         sob_id + NIC_NUMBER_OF_ENGINES;
1080
1081         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1082         q = &hdev->kernel_queues[queue_id];
1083         q->sync_stream_prop.collective_sob_id =
1084                         sob_id + NIC_NUMBER_OF_ENGINES;
1085 }
1086
1087 static void gaudi_sob_group_hw_reset(struct kref *ref)
1088 {
1089         struct gaudi_hw_sob_group *hw_sob_group =
1090                 container_of(ref, struct gaudi_hw_sob_group, kref);
1091         struct hl_device *hdev = hw_sob_group->hdev;
1092         int i;
1093
1094         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1095                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1096                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1097
1098         kref_init(&hw_sob_group->kref);
1099 }
1100
1101 static void gaudi_sob_group_reset_error(struct kref *ref)
1102 {
1103         struct gaudi_hw_sob_group *hw_sob_group =
1104                 container_of(ref, struct gaudi_hw_sob_group, kref);
1105         struct hl_device *hdev = hw_sob_group->hdev;
1106
1107         dev_crit(hdev->dev,
1108                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1109                 hw_sob_group->base_sob_id);
1110 }
1111
1112 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1113 {
1114         struct gaudi_collective_properties *prop;
1115         int i;
1116
1117         prop = &gaudi->collective_props;
1118
1119         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1120
1121         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1122                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1123                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1124                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1125         /* Set collective engine bit */
1126         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1127                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1128 }
1129
1130 static int gaudi_collective_init(struct hl_device *hdev)
1131 {
1132         u32 i, sob_id, reserved_sobs_per_group;
1133         struct gaudi_collective_properties *prop;
1134         struct gaudi_device *gaudi;
1135
1136         gaudi = hdev->asic_specific;
1137         prop = &gaudi->collective_props;
1138         sob_id = hdev->asic_prop.collective_first_sob;
1139
1140         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1141         reserved_sobs_per_group =
1142                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1143
1144         /* Init SOB groups */
1145         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1146                 prop->hw_sob_group[i].hdev = hdev;
1147                 prop->hw_sob_group[i].base_sob_id = sob_id;
1148                 sob_id += reserved_sobs_per_group;
1149                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1150         }
1151
1152         for (i = 0 ; i < QMAN_STREAMS; i++) {
1153                 prop->next_sob_group_val[i] = 1;
1154                 prop->curr_sob_group_idx[i] = 0;
1155                 gaudi_collective_map_sobs(hdev, i);
1156         }
1157
1158         gaudi_collective_mstr_sob_mask_set(gaudi);
1159
1160         return 0;
1161 }
1162
1163 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1164 {
1165         struct gaudi_device *gaudi = hdev->asic_specific;
1166         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1167
1168         kref_put(&cprop->hw_sob_group[sob_group].kref,
1169                                         gaudi_sob_group_hw_reset);
1170 }
1171
1172 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1173                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1174 {
1175         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1176         struct gaudi_collective_properties *cprop;
1177         struct hl_gen_wait_properties wait_prop;
1178         struct hl_sync_stream_properties *prop;
1179         struct gaudi_device *gaudi;
1180
1181         gaudi = hdev->asic_specific;
1182         cprop = &gaudi->collective_props;
1183         queue_id = job->hw_queue_id;
1184         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1185
1186         master_sob_base =
1187                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1188         master_monitor = prop->collective_mstr_mon_id[0];
1189
1190         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1191
1192         dev_dbg(hdev->dev,
1193                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1194                 master_sob_base, cprop->mstr_sob_mask[0],
1195                 cprop->next_sob_group_val[stream],
1196                 master_monitor, queue_id);
1197
1198         wait_prop.data = (void *) job->patched_cb;
1199         wait_prop.sob_base = master_sob_base;
1200         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1201         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1202         wait_prop.mon_id = master_monitor;
1203         wait_prop.q_idx = queue_id;
1204         wait_prop.size = cb_size;
1205         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1206
1207         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1208         master_monitor = prop->collective_mstr_mon_id[1];
1209
1210         dev_dbg(hdev->dev,
1211                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1212                 master_sob_base, cprop->mstr_sob_mask[1],
1213                 cprop->next_sob_group_val[stream],
1214                 master_monitor, queue_id);
1215
1216         wait_prop.sob_base = master_sob_base;
1217         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1218         wait_prop.mon_id = master_monitor;
1219         wait_prop.size = cb_size;
1220         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1221 }
1222
1223 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1224                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1225 {
1226         struct hl_gen_wait_properties wait_prop;
1227         struct hl_sync_stream_properties *prop;
1228         u32 queue_id, cb_size = 0;
1229
1230         queue_id = job->hw_queue_id;
1231         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1232
1233         if (job->cs->encaps_signals) {
1234                 /* use the encaps signal handle store earlier in the flow
1235                  * and set the SOB information from the encaps
1236                  * signals handle
1237                  */
1238                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1239                                                 cs_cmpl);
1240
1241                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1242                                 job->cs->sequence,
1243                                 cs_cmpl->hw_sob->sob_id,
1244                                 cs_cmpl->sob_val);
1245         }
1246
1247         /* Add to wait CBs using slave monitor */
1248         wait_prop.data = (void *) job->user_cb;
1249         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1250         wait_prop.sob_mask = 0x1;
1251         wait_prop.sob_val = cs_cmpl->sob_val;
1252         wait_prop.mon_id = prop->collective_slave_mon_id;
1253         wait_prop.q_idx = queue_id;
1254         wait_prop.size = cb_size;
1255
1256         dev_dbg(hdev->dev,
1257                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1258                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1259                 prop->collective_slave_mon_id, queue_id);
1260
1261         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1262
1263         dev_dbg(hdev->dev,
1264                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1265                 prop->collective_sob_id, queue_id);
1266
1267         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1268                         prop->collective_sob_id, cb_size, false);
1269 }
1270
1271 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1272 {
1273         struct hl_cs_compl *signal_cs_cmpl =
1274                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1275         struct hl_cs_compl *cs_cmpl =
1276                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1277         struct gaudi_collective_properties *cprop;
1278         u32 stream, queue_id, sob_group_offset;
1279         struct gaudi_device *gaudi;
1280         struct hl_device *hdev;
1281         struct hl_cs_job *job;
1282         struct hl_ctx *ctx;
1283
1284         ctx = cs->ctx;
1285         hdev = ctx->hdev;
1286         gaudi = hdev->asic_specific;
1287         cprop = &gaudi->collective_props;
1288
1289         /* In encaps signals case the SOB info will be retrieved from
1290          * the handle in gaudi_collective_slave_init_job.
1291          */
1292         if (!cs->encaps_signals) {
1293                 /* copy the SOB id and value of the signal CS */
1294                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1295                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1296         }
1297
1298         /* check again if the signal cs already completed.
1299          * if yes then don't send any wait cs since the hw_sob
1300          * could be in reset already. if signal is not completed
1301          * then get refcount to hw_sob to prevent resetting the sob
1302          * while wait cs is not submitted.
1303          * note that this check is protected by two locks,
1304          * hw queue lock and completion object lock,
1305          * and the same completion object lock also protects
1306          * the hw_sob reset handler function.
1307          * The hw_queue lock prevent out of sync of hw_sob
1308          * refcount value, changed by signal/wait flows.
1309          */
1310         spin_lock(&signal_cs_cmpl->lock);
1311
1312         if (completion_done(&cs->signal_fence->completion)) {
1313                 spin_unlock(&signal_cs_cmpl->lock);
1314                 return -EINVAL;
1315         }
1316         /* Increment kref since all slave queues are now waiting on it */
1317         kref_get(&cs_cmpl->hw_sob->kref);
1318
1319         spin_unlock(&signal_cs_cmpl->lock);
1320
1321         /* Calculate the stream from collective master queue (1st job) */
1322         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1323         stream = job->hw_queue_id % 4;
1324         sob_group_offset =
1325                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1326
1327         list_for_each_entry(job, &cs->job_list, cs_node) {
1328                 queue_id = job->hw_queue_id;
1329
1330                 if (hdev->kernel_queues[queue_id].collective_mode ==
1331                                 HL_COLLECTIVE_MASTER)
1332                         gaudi_collective_master_init_job(hdev, job, stream,
1333                                                 sob_group_offset);
1334                 else
1335                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1336         }
1337
1338         cs_cmpl->sob_group = sob_group_offset;
1339
1340         /* Handle sob group kref and wraparound */
1341         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1342         cprop->next_sob_group_val[stream]++;
1343
1344         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1345                 /*
1346                  * Decrement as we reached the max value.
1347                  * The release function won't be called here as we've
1348                  * just incremented the refcount.
1349                  */
1350                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1351                                 gaudi_sob_group_reset_error);
1352                 cprop->next_sob_group_val[stream] = 1;
1353                 /* only two SOBs are currently in use */
1354                 cprop->curr_sob_group_idx[stream] =
1355                         (cprop->curr_sob_group_idx[stream] + 1) &
1356                                                         (HL_RSVD_SOBS - 1);
1357
1358                 gaudi_collective_map_sobs(hdev, stream);
1359
1360                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1361                                 cprop->curr_sob_group_idx[stream], stream);
1362         }
1363
1364         mb();
1365         hl_fence_put(cs->signal_fence);
1366         cs->signal_fence = NULL;
1367
1368         return 0;
1369 }
1370
1371 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1372                 struct hl_ctx *ctx, struct hl_cs *cs,
1373                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1374                 u32 encaps_signal_offset)
1375 {
1376         struct hw_queue_properties *hw_queue_prop;
1377         struct hl_cs_counters_atomic *cntr;
1378         struct hl_cs_job *job;
1379         struct hl_cb *cb;
1380         u32 cb_size;
1381         bool patched_cb;
1382
1383         cntr = &hdev->aggregated_cs_counters;
1384
1385         if (mode == HL_COLLECTIVE_MASTER) {
1386                 /* CB size of collective master queue contains
1387                  * 4 msg short packets for monitor 1 configuration
1388                  * 1 fence packet
1389                  * 4 msg short packets for monitor 2 configuration
1390                  * 1 fence packet
1391                  * 2 msg prot packets for completion and MSI-X
1392                  */
1393                 cb_size = sizeof(struct packet_msg_short) * 8 +
1394                                 sizeof(struct packet_fence) * 2 +
1395                                 sizeof(struct packet_msg_prot) * 2;
1396                 patched_cb = true;
1397         } else {
1398                 /* CB size of collective slave queues contains
1399                  * 4 msg short packets for monitor configuration
1400                  * 1 fence packet
1401                  * 1 additional msg short packet for sob signal
1402                  */
1403                 cb_size = sizeof(struct packet_msg_short) * 5 +
1404                                 sizeof(struct packet_fence);
1405                 patched_cb = false;
1406         }
1407
1408         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1409         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1410         if (!job) {
1411                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1412                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1413                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1414                 return -ENOMEM;
1415         }
1416
1417         /* Allocate internal mapped CB for non patched CBs */
1418         cb = hl_cb_kernel_create(hdev, cb_size,
1419                         hdev->mmu_enable && !patched_cb);
1420         if (!cb) {
1421                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1422                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1423                 kfree(job);
1424                 return -EFAULT;
1425         }
1426
1427         job->id = 0;
1428         job->cs = cs;
1429         job->user_cb = cb;
1430         atomic_inc(&job->user_cb->cs_cnt);
1431         job->user_cb_size = cb_size;
1432         job->hw_queue_id = queue_id;
1433
1434         /* since its guaranteed to have only one chunk in the collective wait
1435          * cs, we can use this chunk to set the encapsulated signal offset
1436          * in the jobs.
1437          */
1438         if (cs->encaps_signals)
1439                 job->encaps_sig_wait_offset = encaps_signal_offset;
1440
1441         /*
1442          * No need in parsing, user CB is the patched CB.
1443          * We call hl_cb_destroy() out of two reasons - we don't need
1444          * the CB in the CB idr anymore and to decrement its refcount as
1445          * it was incremented inside hl_cb_kernel_create().
1446          */
1447         if (patched_cb)
1448                 job->patched_cb = job->user_cb;
1449         else
1450                 job->patched_cb = NULL;
1451
1452         job->job_cb_size = job->user_cb_size;
1453         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1454
1455         /* increment refcount as for external queues we get completion */
1456         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1457                 cs_get(cs);
1458
1459         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1460
1461         list_add_tail(&job->cs_node, &cs->job_list);
1462
1463         hl_debugfs_add_job(hdev, job);
1464
1465         return 0;
1466 }
1467
1468 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1469                 struct hl_ctx *ctx, struct hl_cs *cs,
1470                 u32 wait_queue_id, u32 collective_engine_id,
1471                 u32 encaps_signal_offset)
1472 {
1473         struct gaudi_device *gaudi = hdev->asic_specific;
1474         struct hw_queue_properties *hw_queue_prop;
1475         u32 queue_id, collective_queue, num_jobs;
1476         u32 stream, nic_queue, nic_idx = 0;
1477         bool skip;
1478         int i, rc = 0;
1479
1480         /* Verify wait queue id is configured as master */
1481         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1482         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1483                 dev_err(hdev->dev,
1484                         "Queue %d is not configured as collective master\n",
1485                         wait_queue_id);
1486                 return -EINVAL;
1487         }
1488
1489         /* Verify engine id is supported */
1490         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1491                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1492                 dev_err(hdev->dev,
1493                         "Collective wait does not support engine %u\n",
1494                         collective_engine_id);
1495                 return -EINVAL;
1496         }
1497
1498         stream = wait_queue_id % 4;
1499
1500         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1501                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1502         else
1503                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1504
1505         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1506         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1507
1508         /* First job goes to the collective master queue, it will wait for
1509          * the collective slave queues to finish execution.
1510          * The synchronization is done using two monitors:
1511          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1512          * reduction engine (DMA5/TPC7).
1513          *
1514          * Rest of the jobs goes to the collective slave queues which will
1515          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1516          */
1517         for (i = 0 ; i < num_jobs ; i++) {
1518                 if (i == 0) {
1519                         queue_id = wait_queue_id;
1520                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1521                                 HL_COLLECTIVE_MASTER, queue_id,
1522                                 wait_queue_id, encaps_signal_offset);
1523                 } else {
1524                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1525                                 if (gaudi->hw_cap_initialized &
1526                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1527                                         skip = false;
1528                                 else
1529                                         skip = true;
1530
1531                                 queue_id = nic_queue;
1532                                 nic_queue += 4;
1533                                 nic_idx++;
1534
1535                                 if (skip)
1536                                         continue;
1537                         } else {
1538                                 queue_id = collective_queue;
1539                         }
1540
1541                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1542                                 HL_COLLECTIVE_SLAVE, queue_id,
1543                                 wait_queue_id, encaps_signal_offset);
1544                 }
1545
1546                 if (rc)
1547                         return rc;
1548         }
1549
1550         return rc;
1551 }
1552
1553 static int gaudi_late_init(struct hl_device *hdev)
1554 {
1555         struct gaudi_device *gaudi = hdev->asic_specific;
1556         int rc;
1557
1558         rc = gaudi->cpucp_info_get(hdev);
1559         if (rc) {
1560                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1561                 return rc;
1562         }
1563
1564         if ((hdev->card_type == cpucp_card_type_pci) &&
1565                         (hdev->nic_ports_mask & 0x3)) {
1566                 dev_info(hdev->dev,
1567                         "PCI card detected, only 8 ports are enabled\n");
1568                 hdev->nic_ports_mask &= ~0x3;
1569
1570                 /* Stop and disable unused NIC QMANs */
1571                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1572                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1573                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1574
1575                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1576                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1577                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1578
1579                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1580                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1581
1582                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1583         }
1584
1585         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1586         if (rc) {
1587                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1588                 return rc;
1589         }
1590
1591         /* Scrub both SRAM and DRAM */
1592         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1593         if (rc)
1594                 goto disable_pci_access;
1595
1596         rc = gaudi_fetch_psoc_frequency(hdev);
1597         if (rc) {
1598                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1599                 goto disable_pci_access;
1600         }
1601
1602         rc = gaudi_mmu_clear_pgt_range(hdev);
1603         if (rc) {
1604                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1605                 goto disable_pci_access;
1606         }
1607
1608         rc = gaudi_init_tpc_mem(hdev);
1609         if (rc) {
1610                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1611                 goto disable_pci_access;
1612         }
1613
1614         rc = gaudi_collective_init(hdev);
1615         if (rc) {
1616                 dev_err(hdev->dev, "Failed to init collective\n");
1617                 goto disable_pci_access;
1618         }
1619
1620         /* We only support a single ASID for the user, so for the sake of optimization, just
1621          * initialize the ASID one time during device initialization with the fixed value of 1
1622          */
1623         gaudi_mmu_prepare(hdev, 1);
1624
1625         return 0;
1626
1627 disable_pci_access:
1628         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1629
1630         return rc;
1631 }
1632
1633 static void gaudi_late_fini(struct hl_device *hdev)
1634 {
1635         const struct hwmon_channel_info **channel_info_arr;
1636         int i = 0;
1637
1638         if (!hdev->hl_chip_info->info)
1639                 return;
1640
1641         channel_info_arr = hdev->hl_chip_info->info;
1642
1643         while (channel_info_arr[i]) {
1644                 kfree(channel_info_arr[i]->config);
1645                 kfree(channel_info_arr[i]);
1646                 i++;
1647         }
1648
1649         kfree(channel_info_arr);
1650
1651         hdev->hl_chip_info->info = NULL;
1652 }
1653
1654 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1655 {
1656         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1657         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1658         int i, j, rc = 0;
1659
1660         /*
1661          * The device CPU works with 40-bits addresses, while bit 39 must be set
1662          * to '1' when accessing the host.
1663          * Bits 49:39 of the full host address are saved for a later
1664          * configuration of the HW to perform extension to 50 bits.
1665          * Because there is a single HW register that holds the extension bits,
1666          * these bits must be identical in all allocated range.
1667          */
1668
1669         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1670                 virt_addr_arr[i] =
1671                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1672                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1673                                                 &dma_addr_arr[i],
1674                                                 GFP_KERNEL | __GFP_ZERO);
1675                 if (!virt_addr_arr[i]) {
1676                         rc = -ENOMEM;
1677                         goto free_dma_mem_arr;
1678                 }
1679
1680                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1681                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1682                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1683                         break;
1684         }
1685
1686         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1687                 dev_err(hdev->dev,
1688                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1689                 rc = -EFAULT;
1690                 goto free_dma_mem_arr;
1691         }
1692
1693         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1694         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1695         hdev->cpu_pci_msb_addr =
1696                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1697
1698         if (!hdev->asic_prop.fw_security_enabled)
1699                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1700
1701 free_dma_mem_arr:
1702         for (j = 0 ; j < i ; j++)
1703                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1704                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1705                                                 virt_addr_arr[j],
1706                                                 dma_addr_arr[j]);
1707
1708         return rc;
1709 }
1710
1711 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1712 {
1713         struct gaudi_device *gaudi = hdev->asic_specific;
1714         struct gaudi_internal_qman_info *q;
1715         u32 i;
1716
1717         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1718                 q = &gaudi->internal_qmans[i];
1719                 if (!q->pq_kernel_addr)
1720                         continue;
1721                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1722                                                         q->pq_kernel_addr,
1723                                                         q->pq_dma_addr);
1724         }
1725 }
1726
1727 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1728 {
1729         struct gaudi_device *gaudi = hdev->asic_specific;
1730         struct gaudi_internal_qman_info *q;
1731         int rc, i;
1732
1733         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1734                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1735                         continue;
1736
1737                 q = &gaudi->internal_qmans[i];
1738
1739                 switch (i) {
1740                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1741                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1742                         break;
1743                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1744                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1745                         break;
1746                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1747                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1748                         break;
1749                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1750                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1751                         break;
1752                 default:
1753                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1754                         rc = -EINVAL;
1755                         goto free_internal_qmans_pq_mem;
1756                 }
1757
1758                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1759                                                 hdev, q->pq_size,
1760                                                 &q->pq_dma_addr,
1761                                                 GFP_KERNEL | __GFP_ZERO);
1762                 if (!q->pq_kernel_addr) {
1763                         rc = -ENOMEM;
1764                         goto free_internal_qmans_pq_mem;
1765                 }
1766         }
1767
1768         return 0;
1769
1770 free_internal_qmans_pq_mem:
1771         gaudi_free_internal_qmans_pq_mem(hdev);
1772         return rc;
1773 }
1774
1775 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1776 {
1777         struct asic_fixed_properties *prop = &hdev->asic_prop;
1778         struct pci_mem_region *region;
1779
1780         /* CFG */
1781         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1782         region->region_base = CFG_BASE;
1783         region->region_size = CFG_SIZE;
1784         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1785         region->bar_size = CFG_BAR_SIZE;
1786         region->bar_id = CFG_BAR_ID;
1787         region->used = 1;
1788
1789         /* SRAM */
1790         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1791         region->region_base = SRAM_BASE_ADDR;
1792         region->region_size = SRAM_SIZE;
1793         region->offset_in_bar = 0;
1794         region->bar_size = SRAM_BAR_SIZE;
1795         region->bar_id = SRAM_BAR_ID;
1796         region->used = 1;
1797
1798         /* DRAM */
1799         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1800         region->region_base = DRAM_PHYS_BASE;
1801         region->region_size = hdev->asic_prop.dram_size;
1802         region->offset_in_bar = 0;
1803         region->bar_size = prop->dram_pci_bar_size;
1804         region->bar_id = HBM_BAR_ID;
1805         region->used = 1;
1806
1807         /* SP SRAM */
1808         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1809         region->region_base = PSOC_SCRATCHPAD_ADDR;
1810         region->region_size = PSOC_SCRATCHPAD_SIZE;
1811         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1812         region->bar_size = CFG_BAR_SIZE;
1813         region->bar_id = CFG_BAR_ID;
1814         region->used = 1;
1815 }
1816
1817 static int gaudi_sw_init(struct hl_device *hdev)
1818 {
1819         struct gaudi_device *gaudi;
1820         u32 i, event_id = 0;
1821         int rc;
1822
1823         /* Allocate device structure */
1824         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1825         if (!gaudi)
1826                 return -ENOMEM;
1827
1828         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1829                 if (gaudi_irq_map_table[i].valid) {
1830                         if (event_id == GAUDI_EVENT_SIZE) {
1831                                 dev_err(hdev->dev,
1832                                         "Event array exceeds the limit of %u events\n",
1833                                         GAUDI_EVENT_SIZE);
1834                                 rc = -EINVAL;
1835                                 goto free_gaudi_device;
1836                         }
1837
1838                         gaudi->events[event_id++] =
1839                                         gaudi_irq_map_table[i].fc_id;
1840                 }
1841         }
1842
1843         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1844
1845         hdev->asic_specific = gaudi;
1846
1847         /* Create DMA pool for small allocations */
1848         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1849                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1850         if (!hdev->dma_pool) {
1851                 dev_err(hdev->dev, "failed to create DMA pool\n");
1852                 rc = -ENOMEM;
1853                 goto free_gaudi_device;
1854         }
1855
1856         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1857         if (rc)
1858                 goto free_dma_pool;
1859
1860         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1861         if (!hdev->cpu_accessible_dma_pool) {
1862                 dev_err(hdev->dev,
1863                         "Failed to create CPU accessible DMA pool\n");
1864                 rc = -ENOMEM;
1865                 goto free_cpu_dma_mem;
1866         }
1867
1868         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1869                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1870                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1871         if (rc) {
1872                 dev_err(hdev->dev,
1873                         "Failed to add memory to CPU accessible DMA pool\n");
1874                 rc = -EFAULT;
1875                 goto free_cpu_accessible_dma_pool;
1876         }
1877
1878         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1879         if (rc)
1880                 goto free_cpu_accessible_dma_pool;
1881
1882         spin_lock_init(&gaudi->hw_queues_lock);
1883         mutex_init(&gaudi->clk_gate_mutex);
1884
1885         hdev->supports_sync_stream = true;
1886         hdev->supports_coresight = true;
1887         hdev->supports_staged_submission = true;
1888         hdev->supports_wait_for_multi_cs = true;
1889
1890         hdev->asic_funcs->set_pci_memory_regions(hdev);
1891         hdev->stream_master_qid_arr =
1892                                 hdev->asic_funcs->get_stream_master_qid_arr();
1893         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1894
1895         return 0;
1896
1897 free_cpu_accessible_dma_pool:
1898         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1899 free_cpu_dma_mem:
1900         if (!hdev->asic_prop.fw_security_enabled)
1901                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1902                                         hdev->cpu_pci_msb_addr);
1903         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1904                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1905                         hdev->cpu_accessible_dma_mem,
1906                         hdev->cpu_accessible_dma_address);
1907 free_dma_pool:
1908         dma_pool_destroy(hdev->dma_pool);
1909 free_gaudi_device:
1910         kfree(gaudi);
1911         return rc;
1912 }
1913
1914 static int gaudi_sw_fini(struct hl_device *hdev)
1915 {
1916         struct gaudi_device *gaudi = hdev->asic_specific;
1917
1918         gaudi_free_internal_qmans_pq_mem(hdev);
1919
1920         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1921
1922         if (!hdev->asic_prop.fw_security_enabled)
1923                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1924                                         hdev->cpu_pci_msb_addr);
1925
1926         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1927                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1928                         hdev->cpu_accessible_dma_mem,
1929                         hdev->cpu_accessible_dma_address);
1930
1931         dma_pool_destroy(hdev->dma_pool);
1932
1933         mutex_destroy(&gaudi->clk_gate_mutex);
1934
1935         kfree(gaudi);
1936
1937         return 0;
1938 }
1939
1940 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1941 {
1942         struct hl_device *hdev = arg;
1943         int i;
1944
1945         if (hdev->disabled)
1946                 return IRQ_HANDLED;
1947
1948         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1949                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1950
1951         hl_irq_handler_eq(irq, &hdev->event_queue);
1952
1953         return IRQ_HANDLED;
1954 }
1955
1956 /*
1957  * For backward compatibility, new MSI interrupts should be set after the
1958  * existing CPU and NIC interrupts.
1959  */
1960 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1961                                 bool cpu_eq)
1962 {
1963         int msi_vec;
1964
1965         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1966                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1967                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1968
1969         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1970                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1971
1972         return pci_irq_vector(hdev->pdev, msi_vec);
1973 }
1974
1975 static int gaudi_enable_msi_single(struct hl_device *hdev)
1976 {
1977         int rc, irq;
1978
1979         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1980
1981         irq = gaudi_pci_irq_vector(hdev, 0, false);
1982         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1983                         "gaudi single msi", hdev);
1984         if (rc)
1985                 dev_err(hdev->dev,
1986                         "Failed to request single MSI IRQ\n");
1987
1988         return rc;
1989 }
1990
1991 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1992 {
1993         int cq_cnt = hdev->asic_prop.completion_queues_count;
1994         int rc, i, irq_cnt_init, irq;
1995
1996         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1997                 irq = gaudi_pci_irq_vector(hdev, i, false);
1998                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1999                                 &hdev->completion_queue[i]);
2000                 if (rc) {
2001                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2002                         goto free_irqs;
2003                 }
2004         }
2005
2006         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2007         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2008                                 &hdev->event_queue);
2009         if (rc) {
2010                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2011                 goto free_irqs;
2012         }
2013
2014         return 0;
2015
2016 free_irqs:
2017         for (i = 0 ; i < irq_cnt_init ; i++)
2018                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2019                                 &hdev->completion_queue[i]);
2020         return rc;
2021 }
2022
2023 static int gaudi_enable_msi(struct hl_device *hdev)
2024 {
2025         struct gaudi_device *gaudi = hdev->asic_specific;
2026         int rc;
2027
2028         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2029                 return 0;
2030
2031         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2032         if (rc < 0) {
2033                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2034                 return rc;
2035         }
2036
2037         if (rc < NUMBER_OF_INTERRUPTS) {
2038                 gaudi->multi_msi_mode = false;
2039                 rc = gaudi_enable_msi_single(hdev);
2040         } else {
2041                 gaudi->multi_msi_mode = true;
2042                 rc = gaudi_enable_msi_multi(hdev);
2043         }
2044
2045         if (rc)
2046                 goto free_pci_irq_vectors;
2047
2048         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2049
2050         return 0;
2051
2052 free_pci_irq_vectors:
2053         pci_free_irq_vectors(hdev->pdev);
2054         return rc;
2055 }
2056
2057 static void gaudi_sync_irqs(struct hl_device *hdev)
2058 {
2059         struct gaudi_device *gaudi = hdev->asic_specific;
2060         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2061
2062         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2063                 return;
2064
2065         /* Wait for all pending IRQs to be finished */
2066         if (gaudi->multi_msi_mode) {
2067                 for (i = 0 ; i < cq_cnt ; i++)
2068                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2069
2070                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2071                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2072                                                 true));
2073         } else {
2074                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2075         }
2076 }
2077
2078 static void gaudi_disable_msi(struct hl_device *hdev)
2079 {
2080         struct gaudi_device *gaudi = hdev->asic_specific;
2081         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2082
2083         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2084                 return;
2085
2086         gaudi_sync_irqs(hdev);
2087
2088         if (gaudi->multi_msi_mode) {
2089                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2090                                                 true);
2091                 free_irq(irq, &hdev->event_queue);
2092
2093                 for (i = 0 ; i < cq_cnt ; i++) {
2094                         irq = gaudi_pci_irq_vector(hdev, i, false);
2095                         free_irq(irq, &hdev->completion_queue[i]);
2096                 }
2097         } else {
2098                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2099         }
2100
2101         pci_free_irq_vectors(hdev->pdev);
2102
2103         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2104 }
2105
2106 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2107 {
2108         struct gaudi_device *gaudi = hdev->asic_specific;
2109
2110         if (hdev->asic_prop.fw_security_enabled)
2111                 return;
2112
2113         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2114                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2115                 return;
2116
2117         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2118                 return;
2119
2120         if (!hdev->sram_scrambler_enable)
2121                 return;
2122
2123         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2124                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2125         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2126                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2127         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2128                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2129         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2130                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2131         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2132                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2133         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2134                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2135         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2136                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2137         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2138                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139
2140         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2141                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2142         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2143                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2145                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2147                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2149                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2151                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2153                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2155                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156
2157         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2158                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2159         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2160                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2161         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2162                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2163         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2164                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2165         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2166                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2167         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2168                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2169         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2170                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2171         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2172                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2173
2174         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2175 }
2176
2177 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2178 {
2179         struct gaudi_device *gaudi = hdev->asic_specific;
2180
2181         if (hdev->asic_prop.fw_security_enabled)
2182                 return;
2183
2184         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2185                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2186                 return;
2187
2188         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2189                 return;
2190
2191         if (!hdev->dram_scrambler_enable)
2192                 return;
2193
2194         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2195                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2196         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2197                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2198         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2199                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2200         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2201                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2202         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2203                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2204         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2205                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2206         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2207                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2208         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2209                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210
2211         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2212                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2213         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2214                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2215         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2216                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2217         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2218                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2219         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2220                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2221         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2222                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2223         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2224                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2225         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2226                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227
2228         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2229                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2230         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2231                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2232         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2233                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2234         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2235                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2236         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2237                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2238         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2239                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2240         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2241                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2242         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2243                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2244
2245         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2246 }
2247
2248 static void gaudi_init_e2e(struct hl_device *hdev)
2249 {
2250         if (hdev->asic_prop.fw_security_enabled)
2251                 return;
2252
2253         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2254                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2255                 return;
2256
2257         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2258         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2259         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2260         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2261
2262         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2263         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2264         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2265         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2266
2267         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2268         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2269         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2270         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2271
2272         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2273         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2274         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2275         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2276
2277         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2278         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2279         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2280         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2281
2282         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2283         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2284         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2285         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2286
2287         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2288         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2289         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2290         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2291
2292         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2293         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2294         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2295         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2296
2297         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2298         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2299         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2300         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2301
2302         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2303         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2304         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2305         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2306
2307         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2308         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2309         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2310         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2311
2312         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2313         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2314         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2315         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2316
2317         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2318         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2319         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2320         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2321
2322         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2323         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2324         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2325         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2326
2327         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2328         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2329         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2330         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2331
2332         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2333         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2334         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2335         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2336
2337         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2338         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2339         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2340         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2341
2342         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2343         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2344         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2345         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2346
2347         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2348         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2349         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2350         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2351
2352         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2353         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2354         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2355         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2356
2357         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2358         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2359         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2360         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2361
2362         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2363         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2364         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2365         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2366
2367         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2368         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2369         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2370         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2371
2372         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2373         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2374         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2375         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2376
2377         if (!hdev->dram_scrambler_enable) {
2378                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2379                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2380                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2381                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2382
2383                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2384                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2385                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2386                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2387
2388                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2389                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2390                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2391                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2392
2393                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2394                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2395                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2396                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2397
2398                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2399                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2400                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2401                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2402
2403                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2404                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2405                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2406                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2407
2408                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2409                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2410                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2411                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2412
2413                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2414                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2415                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2416                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2417
2418                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2419                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2420                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2421                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2422
2423                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2424                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2425                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2426                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2427
2428                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2429                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2430                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2431                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2432
2433                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2434                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2435                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2436                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2437
2438                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2439                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2440                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2441                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2442
2443                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2444                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2445                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2446                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2447
2448                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2449                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2450                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2451                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2452
2453                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2454                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2455                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2456                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2457
2458                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2459                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2460                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2461                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2462
2463                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2464                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2465                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2466                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2467
2468                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2469                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2470                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2471                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2472
2473                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2474                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2475                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2476                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2477
2478                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2479                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2480                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2481                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2482
2483                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2484                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2485                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2486                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2487
2488                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2489                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2490                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2491                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2492
2493                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2494                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2495                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2496                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2497         }
2498
2499         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2500                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2501         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2502                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2503
2504         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2505                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2506         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2507                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2508
2509         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2510                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2511         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2512                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2513
2514         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2515                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2516         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2517                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2518
2519         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2520                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2521         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2522                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2523
2524         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2525                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2526         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2527                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2528
2529         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2530                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2531         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2532                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2533
2534         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2535                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2536         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2537                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2538
2539         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2540                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2541         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2542                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2543
2544         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2545                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2546         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2547                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2548
2549         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2550                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2551         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2552                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2553
2554         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2555                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2556         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2557                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2558
2559         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2560                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2561         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2562                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2563
2564         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2565                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2566         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2567                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2568
2569         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2570                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2571         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2572                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2573
2574         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2575                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2576         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2577                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2578
2579         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2580                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2581         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2582                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2583
2584         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2585                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2586         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2587                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2588
2589         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2590                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2591         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2592                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2593
2594         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2595                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2596         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2597                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2598
2599         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2600                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2601         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2602                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2603
2604         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2605                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2606         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2607                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2608
2609         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2610                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2611         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2612                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2613
2614         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2615                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2616         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2617                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2618 }
2619
2620 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2621 {
2622         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2623
2624         if (hdev->asic_prop.fw_security_enabled)
2625                 return;
2626
2627         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2628                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2629                 return;
2630
2631         hbm0_wr = 0x33333333;
2632         hbm0_rd = 0x77777777;
2633         hbm1_wr = 0x55555555;
2634         hbm1_rd = 0xDDDDDDDD;
2635
2636         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2637         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2638         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2639         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2640
2641         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2642         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2643         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2644         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2645
2646         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2647         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2648         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2649         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2650
2651         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2652         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2653         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2654         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2655
2656         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2657                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2658                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2659         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2660                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2661                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2662         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2663                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2664                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2665         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2666                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2667                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2668
2669         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2670                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2671                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2672         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2673                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2676                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2679                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2680                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2681 }
2682
2683 static void gaudi_init_golden_registers(struct hl_device *hdev)
2684 {
2685         u32 tpc_offset;
2686         int tpc_id, i;
2687
2688         gaudi_init_e2e(hdev);
2689         gaudi_init_hbm_cred(hdev);
2690
2691         for (tpc_id = 0, tpc_offset = 0;
2692                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2693                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2694                 /* Mask all arithmetic interrupts from TPC */
2695                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2696                 /* Set 16 cache lines */
2697                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2698                                 ICACHE_FETCH_LINE_NUM, 2);
2699         }
2700
2701         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2702         for (i = 0 ; i < 128 ; i += 8)
2703                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2704
2705         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2707         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2708         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2709 }
2710
2711 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2712                                         int qman_id, dma_addr_t qman_pq_addr)
2713 {
2714         struct cpu_dyn_regs *dyn_regs =
2715                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2716         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2717         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2718         u32 q_off, dma_qm_offset;
2719         u32 dma_qm_err_cfg, irq_handler_offset;
2720
2721         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2722
2723         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2724                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2725         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2726                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2727         so_base_en_lo = lower_32_bits(CFG_BASE +
2728                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2729         so_base_en_hi = upper_32_bits(CFG_BASE +
2730                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2731         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2732                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2733         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2734                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2735         so_base_ws_lo = lower_32_bits(CFG_BASE +
2736                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2737         so_base_ws_hi = upper_32_bits(CFG_BASE +
2738                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2739
2740         q_off = dma_qm_offset + qman_id * 4;
2741
2742         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2743         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2744
2745         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2746         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2747         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2748
2749         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2750         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2751                                                         QMAN_LDMA_SRC_OFFSET);
2752         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2753                                                         QMAN_LDMA_DST_OFFSET);
2754
2755         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2756         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2757         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2758         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2759         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2760         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2761         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2762         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2763
2764         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2765
2766         /* The following configuration is needed only once per QMAN */
2767         if (qman_id == 0) {
2768                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2769                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2770                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2771
2772                 /* Configure RAZWI IRQ */
2773                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2774                 if (hdev->stop_on_err)
2775                         dma_qm_err_cfg |=
2776                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2777
2778                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2779
2780                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2781                         lower_32_bits(CFG_BASE + irq_handler_offset));
2782                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2783                         upper_32_bits(CFG_BASE + irq_handler_offset));
2784
2785                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2786                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2787                                                                         dma_id);
2788
2789                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2790                                 QM_ARB_ERR_MSG_EN_MASK);
2791
2792                 /* Increase ARB WDT to support streams architecture */
2793                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2794                                 GAUDI_ARB_WDT_TIMEOUT);
2795
2796                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2797                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2798
2799                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2800         }
2801 }
2802
2803 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2804 {
2805         struct cpu_dyn_regs *dyn_regs =
2806                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2807         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2808         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2809         u32 irq_handler_offset;
2810
2811         /* Set to maximum possible according to physical size */
2812         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2813         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2814
2815         /* WA for H/W bug H3-2116 */
2816         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2817
2818         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2819         if (hdev->stop_on_err)
2820                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2821
2822         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2823
2824         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2825                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2826                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2827
2828         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2829                 lower_32_bits(CFG_BASE + irq_handler_offset));
2830         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2831                 upper_32_bits(CFG_BASE + irq_handler_offset));
2832
2833         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2834                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2835         WREG32(mmDMA0_CORE_PROT + dma_offset,
2836                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2837         /* If the channel is secured, it should be in MMU bypass mode */
2838         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2839                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2840         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2841 }
2842
2843 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2844                                 u32 enable_mask)
2845 {
2846         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2847
2848         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2849 }
2850
2851 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2852 {
2853         struct gaudi_device *gaudi = hdev->asic_specific;
2854         struct hl_hw_queue *q;
2855         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2856
2857         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2858                 return;
2859
2860         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2861                 dma_id = gaudi_dma_assignment[i];
2862                 /*
2863                  * For queues after the CPU Q need to add 1 to get the correct
2864                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2865                  * order to get the correct MSI register.
2866                  */
2867                 if (dma_id > 1) {
2868                         cpu_skip = 1;
2869                         nic_skip = NIC_NUMBER_OF_ENGINES;
2870                 } else {
2871                         cpu_skip = 0;
2872                         nic_skip = 0;
2873                 }
2874
2875                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2876                         q_idx = 4 * dma_id + j + cpu_skip;
2877                         q = &hdev->kernel_queues[q_idx];
2878                         q->cq_id = cq_id++;
2879                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2880                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2881                                                 q->bus_address);
2882                 }
2883
2884                 gaudi_init_dma_core(hdev, dma_id);
2885
2886                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2887         }
2888
2889         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2890 }
2891
2892 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2893                                         int qman_id, u64 qman_base_addr)
2894 {
2895         struct cpu_dyn_regs *dyn_regs =
2896                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2897         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2898         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2899         u32 dma_qm_err_cfg, irq_handler_offset;
2900         u32 q_off, dma_qm_offset;
2901
2902         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2903
2904         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2905                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2906         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2907                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2908         so_base_en_lo = lower_32_bits(CFG_BASE +
2909                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2910         so_base_en_hi = upper_32_bits(CFG_BASE +
2911                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2912         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2913                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2914         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2915                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2916         so_base_ws_lo = lower_32_bits(CFG_BASE +
2917                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2918         so_base_ws_hi = upper_32_bits(CFG_BASE +
2919                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2920
2921         q_off = dma_qm_offset + qman_id * 4;
2922
2923         if (qman_id < 4) {
2924                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2925                                         lower_32_bits(qman_base_addr));
2926                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2927                                         upper_32_bits(qman_base_addr));
2928
2929                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2930                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2931                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2932
2933                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2934                                                         QMAN_CPDMA_SIZE_OFFSET);
2935                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2936                                                         QMAN_CPDMA_SRC_OFFSET);
2937                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2938                                                         QMAN_CPDMA_DST_OFFSET);
2939         } else {
2940                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2941                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2942                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2943
2944                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2945                                                         QMAN_LDMA_SIZE_OFFSET);
2946                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2947                                                         QMAN_LDMA_SRC_OFFSET);
2948                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2949                                                         QMAN_LDMA_DST_OFFSET);
2950
2951                 /* Configure RAZWI IRQ */
2952                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2953                 if (hdev->stop_on_err)
2954                         dma_qm_err_cfg |=
2955                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2956
2957                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2958
2959                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2960                         lower_32_bits(CFG_BASE + irq_handler_offset));
2961                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2962                         upper_32_bits(CFG_BASE + irq_handler_offset));
2963
2964                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2965                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2966                                                                         dma_id);
2967
2968                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2969                                 QM_ARB_ERR_MSG_EN_MASK);
2970
2971                 /* Increase ARB WDT to support streams architecture */
2972                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2973                                 GAUDI_ARB_WDT_TIMEOUT);
2974
2975                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2976                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2977                                 QMAN_INTERNAL_MAKE_TRUSTED);
2978         }
2979
2980         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2981         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2982         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2983         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2984
2985         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2986         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2987                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2988                                 mtr_base_ws_lo);
2989                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2990                                 mtr_base_ws_hi);
2991                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2992                                 so_base_ws_lo);
2993                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2994                                 so_base_ws_hi);
2995         }
2996 }
2997
2998 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2999 {
3000         struct gaudi_device *gaudi = hdev->asic_specific;
3001         struct gaudi_internal_qman_info *q;
3002         u64 qman_base_addr;
3003         int i, j, dma_id, internal_q_index;
3004
3005         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3006                 return;
3007
3008         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3009                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3010
3011                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3012                          /*
3013                           * Add the CPU queue in order to get the correct queue
3014                           * number as all internal queue are placed after it
3015                           */
3016                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3017
3018                         q = &gaudi->internal_qmans[internal_q_index];
3019                         qman_base_addr = (u64) q->pq_dma_addr;
3020                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3021                                                 qman_base_addr);
3022                 }
3023
3024                 /* Initializing lower CP for HBM DMA QMAN */
3025                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3026
3027                 gaudi_init_dma_core(hdev, dma_id);
3028
3029                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3030         }
3031
3032         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3033 }
3034
3035 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3036                                         int qman_id, u64 qman_base_addr)
3037 {
3038         struct cpu_dyn_regs *dyn_regs =
3039                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3040         u32 mtr_base_lo, mtr_base_hi;
3041         u32 so_base_lo, so_base_hi;
3042         u32 irq_handler_offset;
3043         u32 q_off, mme_id;
3044         u32 mme_qm_err_cfg;
3045
3046         mtr_base_lo = lower_32_bits(CFG_BASE +
3047                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3048         mtr_base_hi = upper_32_bits(CFG_BASE +
3049                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3050         so_base_lo = lower_32_bits(CFG_BASE +
3051                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3052         so_base_hi = upper_32_bits(CFG_BASE +
3053                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3054
3055         q_off = mme_offset + qman_id * 4;
3056
3057         if (qman_id < 4) {
3058                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3059                                         lower_32_bits(qman_base_addr));
3060                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3061                                         upper_32_bits(qman_base_addr));
3062
3063                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3064                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3065                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3066
3067                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3068                                                         QMAN_CPDMA_SIZE_OFFSET);
3069                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3070                                                         QMAN_CPDMA_SRC_OFFSET);
3071                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3072                                                         QMAN_CPDMA_DST_OFFSET);
3073         } else {
3074                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3075                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3076                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3077
3078                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3079                                                         QMAN_LDMA_SIZE_OFFSET);
3080                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3081                                                         QMAN_LDMA_SRC_OFFSET);
3082                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3083                                                         QMAN_LDMA_DST_OFFSET);
3084
3085                 /* Configure RAZWI IRQ */
3086                 mme_id = mme_offset /
3087                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3088
3089                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3090                 if (hdev->stop_on_err)
3091                         mme_qm_err_cfg |=
3092                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3093
3094                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3095
3096                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3097                         lower_32_bits(CFG_BASE + irq_handler_offset));
3098                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3099                         upper_32_bits(CFG_BASE + irq_handler_offset));
3100
3101                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3102                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3103                                                                         mme_id);
3104
3105                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3106                                 QM_ARB_ERR_MSG_EN_MASK);
3107
3108                 /* Increase ARB WDT to support streams architecture */
3109                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3110                                 GAUDI_ARB_WDT_TIMEOUT);
3111
3112                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3113                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3114                                 QMAN_INTERNAL_MAKE_TRUSTED);
3115         }
3116
3117         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3118         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3119         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3120         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3121 }
3122
3123 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3124 {
3125         struct gaudi_device *gaudi = hdev->asic_specific;
3126         struct gaudi_internal_qman_info *q;
3127         u64 qman_base_addr;
3128         u32 mme_offset;
3129         int i, internal_q_index;
3130
3131         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3132                 return;
3133
3134         /*
3135          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3136          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3137          */
3138
3139         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3140
3141         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3142                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3143                 q = &gaudi->internal_qmans[internal_q_index];
3144                 qman_base_addr = (u64) q->pq_dma_addr;
3145                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3146                                         qman_base_addr);
3147                 if (i == 3)
3148                         mme_offset = 0;
3149         }
3150
3151         /* Initializing lower CP for MME QMANs */
3152         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3153         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3154         gaudi_init_mme_qman(hdev, 0, 4, 0);
3155
3156         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3157         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3158
3159         gaudi->hw_cap_initialized |= HW_CAP_MME;
3160 }
3161
3162 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3163                                 int qman_id, u64 qman_base_addr)
3164 {
3165         struct cpu_dyn_regs *dyn_regs =
3166                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3167         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3168         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3169         u32 tpc_qm_err_cfg, irq_handler_offset;
3170         u32 q_off, tpc_id;
3171
3172         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3173                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3174         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3175                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3176         so_base_en_lo = lower_32_bits(CFG_BASE +
3177                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3178         so_base_en_hi = upper_32_bits(CFG_BASE +
3179                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3180         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3181                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3182         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3183                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3184         so_base_ws_lo = lower_32_bits(CFG_BASE +
3185                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3186         so_base_ws_hi = upper_32_bits(CFG_BASE +
3187                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3188
3189         q_off = tpc_offset + qman_id * 4;
3190
3191         tpc_id = tpc_offset /
3192                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3193
3194         if (qman_id < 4) {
3195                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3196                                         lower_32_bits(qman_base_addr));
3197                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3198                                         upper_32_bits(qman_base_addr));
3199
3200                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3201                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3202                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3203
3204                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3205                                                         QMAN_CPDMA_SIZE_OFFSET);
3206                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3207                                                         QMAN_CPDMA_SRC_OFFSET);
3208                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3209                                                         QMAN_CPDMA_DST_OFFSET);
3210         } else {
3211                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3212                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3213                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3214
3215                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3216                                                         QMAN_LDMA_SIZE_OFFSET);
3217                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3218                                                         QMAN_LDMA_SRC_OFFSET);
3219                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3220                                                         QMAN_LDMA_DST_OFFSET);
3221
3222                 /* Configure RAZWI IRQ */
3223                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3224                 if (hdev->stop_on_err)
3225                         tpc_qm_err_cfg |=
3226                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3227
3228                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3229
3230                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3231                         lower_32_bits(CFG_BASE + irq_handler_offset));
3232                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3233                         upper_32_bits(CFG_BASE + irq_handler_offset));
3234
3235                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3236                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3237                                                                         tpc_id);
3238
3239                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3240                                 QM_ARB_ERR_MSG_EN_MASK);
3241
3242                 /* Increase ARB WDT to support streams architecture */
3243                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3244                                 GAUDI_ARB_WDT_TIMEOUT);
3245
3246                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3247                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3248                                 QMAN_INTERNAL_MAKE_TRUSTED);
3249         }
3250
3251         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3252         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3253         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3254         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3255
3256         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3257         if (tpc_id == 6) {
3258                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3259                                 mtr_base_ws_lo);
3260                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3261                                 mtr_base_ws_hi);
3262                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3263                                 so_base_ws_lo);
3264                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3265                                 so_base_ws_hi);
3266         }
3267 }
3268
3269 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3270 {
3271         struct gaudi_device *gaudi = hdev->asic_specific;
3272         struct gaudi_internal_qman_info *q;
3273         u64 qman_base_addr;
3274         u32 so_base_hi, tpc_offset = 0;
3275         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3276                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3277         int i, tpc_id, internal_q_index;
3278
3279         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3280                 return;
3281
3282         so_base_hi = upper_32_bits(CFG_BASE +
3283                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3284
3285         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3286                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3287                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3288                                                 tpc_id * QMAN_STREAMS + i;
3289                         q = &gaudi->internal_qmans[internal_q_index];
3290                         qman_base_addr = (u64) q->pq_dma_addr;
3291                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3292                                                 qman_base_addr);
3293
3294                         if (i == 3) {
3295                                 /* Initializing lower CP for TPC QMAN */
3296                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3297
3298                                 /* Enable the QMAN and TPC channel */
3299                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3300                                                 QMAN_TPC_ENABLE);
3301                         }
3302                 }
3303
3304                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3305                                 so_base_hi);
3306
3307                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3308
3309                 gaudi->hw_cap_initialized |=
3310                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3311         }
3312 }
3313
3314 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3315                                 int qman_id, u64 qman_base_addr, int nic_id)
3316 {
3317         struct cpu_dyn_regs *dyn_regs =
3318                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3319         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3320         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3321         u32 nic_qm_err_cfg, irq_handler_offset;
3322         u32 q_off;
3323
3324         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3325                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3326         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3327                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3328         so_base_en_lo = lower_32_bits(CFG_BASE +
3329                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3330         so_base_en_hi = upper_32_bits(CFG_BASE +
3331                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3332         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3333                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3334         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3335                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3336         so_base_ws_lo = lower_32_bits(CFG_BASE +
3337                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3338         so_base_ws_hi = upper_32_bits(CFG_BASE +
3339                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3340
3341         q_off = nic_offset + qman_id * 4;
3342
3343         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3344         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3345
3346         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3347         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3348         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3349
3350         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3351                                                         QMAN_LDMA_SIZE_OFFSET);
3352         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3353                                                         QMAN_LDMA_SRC_OFFSET);
3354         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3355                                                         QMAN_LDMA_DST_OFFSET);
3356
3357         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3358         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3359         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3360         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3361
3362         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3363         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3364         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3365         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3366         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3367
3368         if (qman_id == 0) {
3369                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3370                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3371                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3372
3373                 /* Configure RAZWI IRQ */
3374                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3375                 if (hdev->stop_on_err)
3376                         nic_qm_err_cfg |=
3377                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3378
3379                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3380
3381                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3382                         lower_32_bits(CFG_BASE + irq_handler_offset));
3383                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3384                         upper_32_bits(CFG_BASE + irq_handler_offset));
3385
3386                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3387                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3388                                                                         nic_id);
3389
3390                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3391                                 QM_ARB_ERR_MSG_EN_MASK);
3392
3393                 /* Increase ARB WDT to support streams architecture */
3394                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3395                                 GAUDI_ARB_WDT_TIMEOUT);
3396
3397                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3398                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3399                                 QMAN_INTERNAL_MAKE_TRUSTED);
3400         }
3401 }
3402
3403 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3404 {
3405         struct gaudi_device *gaudi = hdev->asic_specific;
3406         struct gaudi_internal_qman_info *q;
3407         u64 qman_base_addr;
3408         u32 nic_offset = 0;
3409         u32 nic_delta_between_qmans =
3410                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3411         u32 nic_delta_between_nics =
3412                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3413         int i, nic_id, internal_q_index;
3414
3415         if (!hdev->nic_ports_mask)
3416                 return;
3417
3418         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3419                 return;
3420
3421         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3422
3423         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3424                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3425                         nic_offset += nic_delta_between_qmans;
3426                         if (nic_id & 1) {
3427                                 nic_offset -= (nic_delta_between_qmans * 2);
3428                                 nic_offset += nic_delta_between_nics;
3429                         }
3430                         continue;
3431                 }
3432
3433                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3434                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3435                                                 nic_id * QMAN_STREAMS + i;
3436                         q = &gaudi->internal_qmans[internal_q_index];
3437                         qman_base_addr = (u64) q->pq_dma_addr;
3438                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3439                                                 qman_base_addr, nic_id);
3440                 }
3441
3442                 /* Enable the QMAN */
3443                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3444
3445                 nic_offset += nic_delta_between_qmans;
3446                 if (nic_id & 1) {
3447                         nic_offset -= (nic_delta_between_qmans * 2);
3448                         nic_offset += nic_delta_between_nics;
3449                 }
3450
3451                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3452         }
3453 }
3454
3455 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3456 {
3457         struct gaudi_device *gaudi = hdev->asic_specific;
3458
3459         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3460                 return;
3461
3462         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3463         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3464         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3465 }
3466
3467 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3468 {
3469         struct gaudi_device *gaudi = hdev->asic_specific;
3470
3471         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3472                 return;
3473
3474         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3475         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3476         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3477         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3478         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3479 }
3480
3481 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3482 {
3483         struct gaudi_device *gaudi = hdev->asic_specific;
3484
3485         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3486                 return;
3487
3488         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3489         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3490 }
3491
3492 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3493 {
3494         struct gaudi_device *gaudi = hdev->asic_specific;
3495         u32 tpc_offset = 0;
3496         int tpc_id;
3497
3498         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3499                 return;
3500
3501         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3502                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3503                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3504         }
3505 }
3506
3507 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3508 {
3509         struct gaudi_device *gaudi = hdev->asic_specific;
3510         u32 nic_mask, nic_offset = 0;
3511         u32 nic_delta_between_qmans =
3512                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3513         u32 nic_delta_between_nics =
3514                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3515         int nic_id;
3516
3517         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3518                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3519
3520                 if (gaudi->hw_cap_initialized & nic_mask)
3521                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3522
3523                 nic_offset += nic_delta_between_qmans;
3524                 if (nic_id & 1) {
3525                         nic_offset -= (nic_delta_between_qmans * 2);
3526                         nic_offset += nic_delta_between_nics;
3527                 }
3528         }
3529 }
3530
3531 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3532 {
3533         struct gaudi_device *gaudi = hdev->asic_specific;
3534
3535         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3536                 return;
3537
3538         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3539         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3540         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3541         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3542 }
3543
3544 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3545 {
3546         struct gaudi_device *gaudi = hdev->asic_specific;
3547
3548         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3549                 return;
3550
3551         /* Stop CPs of HBM DMA QMANs */
3552
3553         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3556         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3557         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3558 }
3559
3560 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3561 {
3562         struct gaudi_device *gaudi = hdev->asic_specific;
3563
3564         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3565                 return;
3566
3567         /* Stop CPs of MME QMANs */
3568         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3569         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3570 }
3571
3572 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3573 {
3574         struct gaudi_device *gaudi = hdev->asic_specific;
3575
3576         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3577                 return;
3578
3579         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3585         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3586         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3587 }
3588
3589 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3590 {
3591         struct gaudi_device *gaudi = hdev->asic_specific;
3592
3593         /* Stop upper CPs of QMANs */
3594
3595         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3596                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3597                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3598                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3599                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3600
3601         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3602                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3603                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3604                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3605                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3606
3607         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3608                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3609                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3610                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3611                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3612
3613         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3614                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3615                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3616                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3617                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3618
3619         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3620                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3621                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3622                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3623                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3624
3625         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3626                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3627                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3628                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3629                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3630
3631         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3632                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3633                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3634                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3635                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3636
3637         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3638                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3639                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3640                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3641                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3642
3643         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3644                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3645                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3646                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3647                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3648
3649         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3650                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3651                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3652                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3653                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3654 }
3655
3656 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3657 {
3658         struct gaudi_device *gaudi = hdev->asic_specific;
3659
3660         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3661                 return;
3662
3663         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3664         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3665         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3666 }
3667
3668 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3669 {
3670         struct gaudi_device *gaudi = hdev->asic_specific;
3671
3672         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3673                 return;
3674
3675         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3678         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3679         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3680 }
3681
3682 static void gaudi_mme_stall(struct hl_device *hdev)
3683 {
3684         struct gaudi_device *gaudi = hdev->asic_specific;
3685
3686         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3687                 return;
3688
3689         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3690         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3691         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3692         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3693         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3694         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3695         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3696         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3697         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3698         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3699         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3700         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3701         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3702         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3703         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3704         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3705         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3706 }
3707
3708 static void gaudi_tpc_stall(struct hl_device *hdev)
3709 {
3710         struct gaudi_device *gaudi = hdev->asic_specific;
3711
3712         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3713                 return;
3714
3715         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3721         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3722         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3723 }
3724
3725 static void gaudi_set_clock_gating(struct hl_device *hdev)
3726 {
3727         struct gaudi_device *gaudi = hdev->asic_specific;
3728         u32 qman_offset;
3729         bool enable;
3730         int i;
3731
3732         /* In case we are during debug session, don't enable the clock gate
3733          * as it may interfere
3734          */
3735         if (hdev->in_debug)
3736                 return;
3737
3738         if (hdev->asic_prop.fw_security_enabled)
3739                 return;
3740
3741         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3742                 enable = !!(hdev->clock_gating_mask &
3743                                 (BIT_ULL(gaudi_dma_assignment[i])));
3744
3745                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3746                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3747                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3748                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3749                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3750         }
3751
3752         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3753                 enable = !!(hdev->clock_gating_mask &
3754                                 (BIT_ULL(gaudi_dma_assignment[i])));
3755
3756                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3757                  * we need to not enable clock gating in that DMA
3758                  */
3759                 if (i == GAUDI_HBM_DMA_4)
3760                         enable = 0;
3761
3762                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3763                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3764                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3765                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3766                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3767         }
3768
3769         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3770         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3771         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3772
3773         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3774         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3775         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3776
3777         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3778                 enable = !!(hdev->clock_gating_mask &
3779                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3780
3781                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3782                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3783                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3784                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3785
3786                 qman_offset += TPC_QMAN_OFFSET;
3787         }
3788
3789         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3790 }
3791
3792 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3793 {
3794         struct gaudi_device *gaudi = hdev->asic_specific;
3795         u32 qman_offset;
3796         int i;
3797
3798         if (hdev->asic_prop.fw_security_enabled)
3799                 return;
3800
3801         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3802                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3803                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3804
3805                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3806         }
3807
3808         WREG32(mmMME0_QM_CGM_CFG, 0);
3809         WREG32(mmMME0_QM_CGM_CFG1, 0);
3810         WREG32(mmMME2_QM_CGM_CFG, 0);
3811         WREG32(mmMME2_QM_CGM_CFG1, 0);
3812
3813         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3814                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3815                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3816
3817                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3818         }
3819
3820         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3821 }
3822
3823 static void gaudi_enable_timestamp(struct hl_device *hdev)
3824 {
3825         /* Disable the timestamp counter */
3826         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3827
3828         /* Zero the lower/upper parts of the 64-bit counter */
3829         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3830         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3831
3832         /* Enable the counter */
3833         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3834 }
3835
3836 static void gaudi_disable_timestamp(struct hl_device *hdev)
3837 {
3838         /* Disable the timestamp counter */
3839         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3840 }
3841
3842 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3843 {
3844         u32 wait_timeout_ms;
3845
3846         dev_info(hdev->dev,
3847                 "Halting compute engines and disabling interrupts\n");
3848
3849         if (hdev->pldm)
3850                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3851         else
3852                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3853
3854         if (fw_reset)
3855                 goto skip_engines;
3856
3857         gaudi_stop_nic_qmans(hdev);
3858         gaudi_stop_mme_qmans(hdev);
3859         gaudi_stop_tpc_qmans(hdev);
3860         gaudi_stop_hbm_dma_qmans(hdev);
3861         gaudi_stop_pci_dma_qmans(hdev);
3862
3863         hdev->asic_funcs->disable_clock_gating(hdev);
3864
3865         msleep(wait_timeout_ms);
3866
3867         gaudi_pci_dma_stall(hdev);
3868         gaudi_hbm_dma_stall(hdev);
3869         gaudi_tpc_stall(hdev);
3870         gaudi_mme_stall(hdev);
3871
3872         msleep(wait_timeout_ms);
3873
3874         gaudi_disable_nic_qmans(hdev);
3875         gaudi_disable_mme_qmans(hdev);
3876         gaudi_disable_tpc_qmans(hdev);
3877         gaudi_disable_hbm_dma_qmans(hdev);
3878         gaudi_disable_pci_dma_qmans(hdev);
3879
3880         gaudi_disable_timestamp(hdev);
3881
3882 skip_engines:
3883         gaudi_disable_msi(hdev);
3884 }
3885
3886 static int gaudi_mmu_init(struct hl_device *hdev)
3887 {
3888         struct asic_fixed_properties *prop = &hdev->asic_prop;
3889         struct gaudi_device *gaudi = hdev->asic_specific;
3890         u64 hop0_addr;
3891         int rc, i;
3892
3893         if (!hdev->mmu_enable)
3894                 return 0;
3895
3896         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3897                 return 0;
3898
3899         for (i = 0 ; i < prop->max_asid ; i++) {
3900                 hop0_addr = prop->mmu_pgt_addr +
3901                                 (i * prop->mmu_hop_table_size);
3902
3903                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3904                 if (rc) {
3905                         dev_err(hdev->dev,
3906                                 "failed to set hop0 addr for asid %d\n", i);
3907                         goto err;
3908                 }
3909         }
3910
3911         /* init MMU cache manage page */
3912         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3913         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3914
3915         /* mem cache invalidation */
3916         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3917
3918         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3919
3920         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3921         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3922
3923         WREG32(mmSTLB_HOP_CONFIGURATION,
3924                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3925
3926         /*
3927          * The H/W expects the first PI after init to be 1. After wraparound
3928          * we'll write 0.
3929          */
3930         gaudi->mmu_cache_inv_pi = 1;
3931
3932         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3933
3934         return 0;
3935
3936 err:
3937         return rc;
3938 }
3939
3940 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3941 {
3942         void __iomem *dst;
3943
3944         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3945
3946         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3947 }
3948
3949 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3950 {
3951         void __iomem *dst;
3952
3953         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3954
3955         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3956 }
3957
3958 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3959 {
3960         struct dynamic_fw_load_mgr *dynamic_loader;
3961         struct cpu_dyn_regs *dyn_regs;
3962
3963         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3964
3965         /*
3966          * here we update initial values for few specific dynamic regs (as
3967          * before reading the first descriptor from FW those value has to be
3968          * hard-coded) in later stages of the protocol those values will be
3969          * updated automatically by reading the FW descriptor so data there
3970          * will always be up-to-date
3971          */
3972         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3973         dyn_regs->kmd_msg_to_cpu =
3974                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3975         dyn_regs->cpu_cmd_status_to_host =
3976                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3977
3978         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3979 }
3980
3981 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3982 {
3983         struct static_fw_load_mgr *static_loader;
3984
3985         static_loader = &hdev->fw_loader.static_loader;
3986
3987         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3988         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3989         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3990         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3991         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3992         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3993         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3994         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3995         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3996         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3997         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3998         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3999         static_loader->cpu_reset_wait_msec = hdev->pldm ?
4000                         GAUDI_PLDM_RESET_WAIT_MSEC :
4001                         GAUDI_CPU_RESET_WAIT_MSEC;
4002 }
4003
4004 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4005 {
4006         struct asic_fixed_properties *prop = &hdev->asic_prop;
4007         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4008
4009         /* fill common fields */
4010         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4011         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4012         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4013         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4014         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4015         fw_loader->skip_bmc = !hdev->bmc_enable;
4016         fw_loader->sram_bar_id = SRAM_BAR_ID;
4017         fw_loader->dram_bar_id = HBM_BAR_ID;
4018
4019         if (prop->dynamic_fw_load)
4020                 gaudi_init_dynamic_firmware_loader(hdev);
4021         else
4022                 gaudi_init_static_firmware_loader(hdev);
4023 }
4024
4025 static int gaudi_init_cpu(struct hl_device *hdev)
4026 {
4027         struct gaudi_device *gaudi = hdev->asic_specific;
4028         int rc;
4029
4030         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4031                 return 0;
4032
4033         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4034                 return 0;
4035
4036         /*
4037          * The device CPU works with 40 bits addresses.
4038          * This register sets the extension to 50 bits.
4039          */
4040         if (!hdev->asic_prop.fw_security_enabled)
4041                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4042
4043         rc = hl_fw_init_cpu(hdev);
4044
4045         if (rc)
4046                 return rc;
4047
4048         gaudi->hw_cap_initialized |= HW_CAP_CPU;
4049
4050         return 0;
4051 }
4052
4053 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4054 {
4055         struct cpu_dyn_regs *dyn_regs =
4056                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4057         struct asic_fixed_properties *prop = &hdev->asic_prop;
4058         struct gaudi_device *gaudi = hdev->asic_specific;
4059         u32 status, irq_handler_offset;
4060         struct hl_eq *eq;
4061         struct hl_hw_queue *cpu_pq =
4062                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4063         int err;
4064
4065         if (!hdev->cpu_queues_enable)
4066                 return 0;
4067
4068         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4069                 return 0;
4070
4071         eq = &hdev->event_queue;
4072
4073         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4074         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4075
4076         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4077         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4078
4079         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4080                         lower_32_bits(hdev->cpu_accessible_dma_address));
4081         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4082                         upper_32_bits(hdev->cpu_accessible_dma_address));
4083
4084         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4085         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4086         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4087
4088         /* Used for EQ CI */
4089         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4090
4091         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4092
4093         if (gaudi->multi_msi_mode)
4094                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4095         else
4096                 WREG32(mmCPU_IF_QUEUE_INIT,
4097                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4098
4099         irq_handler_offset = prop->gic_interrupts_enable ?
4100                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4101                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4102
4103         WREG32(irq_handler_offset,
4104                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4105
4106         err = hl_poll_timeout(
4107                 hdev,
4108                 mmCPU_IF_QUEUE_INIT,
4109                 status,
4110                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4111                 1000,
4112                 cpu_timeout);
4113
4114         if (err) {
4115                 dev_err(hdev->dev,
4116                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4117                 return -EIO;
4118         }
4119
4120         /* update FW application security bits */
4121         if (prop->fw_cpu_boot_dev_sts0_valid)
4122                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4123         if (prop->fw_cpu_boot_dev_sts1_valid)
4124                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4125
4126         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4127         return 0;
4128 }
4129
4130 static void gaudi_pre_hw_init(struct hl_device *hdev)
4131 {
4132         /* Perform read from the device to make sure device is up */
4133         RREG32(mmHW_STATE);
4134
4135         if (!hdev->asic_prop.fw_security_enabled) {
4136                 /* Set the access through PCI bars (Linux driver only) as
4137                  * secured
4138                  */
4139                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4140                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4141                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4142
4143                 /* Perform read to flush the waiting writes to ensure
4144                  * configuration was set in the device
4145                  */
4146                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4147         }
4148
4149         /*
4150          * Let's mark in the H/W that we have reached this point. We check
4151          * this value in the reset_before_init function to understand whether
4152          * we need to reset the chip before doing H/W init. This register is
4153          * cleared by the H/W upon H/W reset
4154          */
4155         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4156 }
4157
4158 static int gaudi_hw_init(struct hl_device *hdev)
4159 {
4160         struct gaudi_device *gaudi = hdev->asic_specific;
4161         int rc;
4162
4163         gaudi_pre_hw_init(hdev);
4164
4165         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4166          * So we set it here and if anyone tries to move it later to
4167          * a different address, there will be an error
4168          */
4169         if (hdev->asic_prop.iatu_done_by_fw)
4170                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4171
4172         /*
4173          * Before pushing u-boot/linux to device, need to set the hbm bar to
4174          * base address of dram
4175          */
4176         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4177                 dev_err(hdev->dev,
4178                         "failed to map HBM bar to DRAM base address\n");
4179                 return -EIO;
4180         }
4181
4182         rc = gaudi_init_cpu(hdev);
4183         if (rc) {
4184                 dev_err(hdev->dev, "failed to initialize CPU\n");
4185                 return rc;
4186         }
4187
4188         /* In case the clock gating was enabled in preboot we need to disable
4189          * it here before touching the MME/TPC registers.
4190          * There is no need to take clk gating mutex because when this function
4191          * runs, no other relevant code can run
4192          */
4193         hdev->asic_funcs->disable_clock_gating(hdev);
4194
4195         /* SRAM scrambler must be initialized after CPU is running from HBM */
4196         gaudi_init_scrambler_sram(hdev);
4197
4198         /* This is here just in case we are working without CPU */
4199         gaudi_init_scrambler_hbm(hdev);
4200
4201         gaudi_init_golden_registers(hdev);
4202
4203         rc = gaudi_mmu_init(hdev);
4204         if (rc)
4205                 return rc;
4206
4207         gaudi_init_security(hdev);
4208
4209         gaudi_init_pci_dma_qmans(hdev);
4210
4211         gaudi_init_hbm_dma_qmans(hdev);
4212
4213         gaudi_init_mme_qmans(hdev);
4214
4215         gaudi_init_tpc_qmans(hdev);
4216
4217         gaudi_init_nic_qmans(hdev);
4218
4219         hdev->asic_funcs->set_clock_gating(hdev);
4220
4221         gaudi_enable_timestamp(hdev);
4222
4223         /* MSI must be enabled before CPU queues and NIC are initialized */
4224         rc = gaudi_enable_msi(hdev);
4225         if (rc)
4226                 goto disable_queues;
4227
4228         /* must be called after MSI was enabled */
4229         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4230         if (rc) {
4231                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4232                         rc);
4233                 goto disable_msi;
4234         }
4235
4236         /* Perform read from the device to flush all configuration */
4237         RREG32(mmHW_STATE);
4238
4239         return 0;
4240
4241 disable_msi:
4242         gaudi_disable_msi(hdev);
4243 disable_queues:
4244         gaudi_disable_mme_qmans(hdev);
4245         gaudi_disable_pci_dma_qmans(hdev);
4246
4247         return rc;
4248 }
4249
4250 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4251 {
4252         struct cpu_dyn_regs *dyn_regs =
4253                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4254         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4255         struct gaudi_device *gaudi = hdev->asic_specific;
4256         bool driver_performs_reset;
4257
4258         if (!hard_reset) {
4259                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4260                 return;
4261         }
4262
4263         if (hdev->pldm) {
4264                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4265                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4266         } else {
4267                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4268                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4269         }
4270
4271         if (fw_reset) {
4272                 dev_info(hdev->dev,
4273                         "Firmware performs HARD reset, going to wait %dms\n",
4274                         reset_timeout_ms);
4275
4276                 goto skip_reset;
4277         }
4278
4279         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4280                                         !hdev->asic_prop.hard_reset_done_by_fw);
4281
4282         /* Set device to handle FLR by H/W as we will put the device CPU to
4283          * halt mode
4284          */
4285         if (driver_performs_reset)
4286                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4287                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4288
4289         /* If linux is loaded in the device CPU we need to communicate with it
4290          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4291          * registers in case of old F/Ws
4292          */
4293         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4294                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4295                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4296                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4297
4298                 WREG32(irq_handler_offset,
4299                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4300
4301                 /* This is a hail-mary attempt to revive the card in the small chance that the
4302                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4303                  * In that case, triggering reset through GIC won't help. We need to trigger the
4304                  * reset as if Linux wasn't loaded.
4305                  *
4306                  * We do it only if the reset cause was HB, because that would be the indication
4307                  * of such an event.
4308                  *
4309                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4310                  * damage.
4311                  */
4312                 if (hdev->curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4313                         if (hdev->asic_prop.hard_reset_done_by_fw)
4314                                 hl_fw_ask_hard_reset_without_linux(hdev);
4315                         else
4316                                 hl_fw_ask_halt_machine_without_linux(hdev);
4317                 }
4318         } else {
4319                 if (hdev->asic_prop.hard_reset_done_by_fw)
4320                         hl_fw_ask_hard_reset_without_linux(hdev);
4321                 else
4322                         hl_fw_ask_halt_machine_without_linux(hdev);
4323         }
4324
4325         if (driver_performs_reset) {
4326
4327                 /* Configure the reset registers. Must be done as early as
4328                  * possible in case we fail during H/W initialization
4329                  */
4330                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4331                                                 (CFG_RST_H_DMA_MASK |
4332                                                 CFG_RST_H_MME_MASK |
4333                                                 CFG_RST_H_SM_MASK |
4334                                                 CFG_RST_H_TPC_7_MASK));
4335
4336                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4337
4338                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4339                                                 (CFG_RST_H_HBM_MASK |
4340                                                 CFG_RST_H_TPC_7_MASK |
4341                                                 CFG_RST_H_NIC_MASK |
4342                                                 CFG_RST_H_SM_MASK |
4343                                                 CFG_RST_H_DMA_MASK |
4344                                                 CFG_RST_H_MME_MASK |
4345                                                 CFG_RST_H_CPU_MASK |
4346                                                 CFG_RST_H_MMU_MASK));
4347
4348                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4349                                                 (CFG_RST_L_IF_MASK |
4350                                                 CFG_RST_L_PSOC_MASK |
4351                                                 CFG_RST_L_TPC_MASK));
4352
4353                 msleep(cpu_timeout_ms);
4354
4355                 /* Tell ASIC not to re-initialize PCIe */
4356                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4357
4358                 /* Restart BTL/BLR upon hard-reset */
4359                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4360
4361                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4362                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4363
4364                 dev_info(hdev->dev,
4365                         "Issued HARD reset command, going to wait %dms\n",
4366                         reset_timeout_ms);
4367         } else {
4368                 dev_info(hdev->dev,
4369                         "Firmware performs HARD reset, going to wait %dms\n",
4370                         reset_timeout_ms);
4371         }
4372
4373 skip_reset:
4374         /*
4375          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4376          * itself is in reset. Need to wait until the reset is deasserted
4377          */
4378         msleep(reset_timeout_ms);
4379
4380         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4381         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4382                 dev_err(hdev->dev,
4383                         "Timeout while waiting for device to reset 0x%x\n",
4384                         status);
4385
4386         if (gaudi) {
4387                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4388                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4389                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4390                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4391                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4392                                 HW_CAP_SRAM_SCRAMBLER |
4393                                 HW_CAP_HBM_SCRAMBLER |
4394                                 HW_CAP_CLK_GATE);
4395
4396                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4397
4398                 hdev->device_cpu_is_halted = false;
4399         }
4400 }
4401
4402 static int gaudi_suspend(struct hl_device *hdev)
4403 {
4404         int rc;
4405
4406         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4407         if (rc)
4408                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4409
4410         return rc;
4411 }
4412
4413 static int gaudi_resume(struct hl_device *hdev)
4414 {
4415         return gaudi_init_iatu(hdev);
4416 }
4417
4418 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4419                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4420 {
4421         int rc;
4422
4423         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4424                         VM_DONTCOPY | VM_NORESERVE;
4425
4426         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4427                                 (dma_addr - HOST_PHYS_BASE), size);
4428         if (rc)
4429                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4430
4431         return rc;
4432 }
4433
4434 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4435 {
4436         struct cpu_dyn_regs *dyn_regs =
4437                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4438         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4439         struct gaudi_device *gaudi = hdev->asic_specific;
4440         bool invalid_queue = false;
4441         int dma_id;
4442
4443         switch (hw_queue_id) {
4444         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4445                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4446                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4447                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4448                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4449                 break;
4450
4451         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4452                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4453                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4454                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4455                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4459                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4460                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4461                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4462                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4463                 break;
4464
4465         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4466                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4467                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4468                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4469                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4470                 break;
4471
4472         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4473                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4474                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4475                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4476                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4477                 break;
4478
4479         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4480                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4481                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4482                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4483                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4484                 break;
4485
4486         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4487                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4488                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4489                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4490                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4491                 break;
4492
4493         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4494                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4495                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4496                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4497                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4498                 break;
4499
4500         case GAUDI_QUEUE_ID_CPU_PQ:
4501                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4502                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4503                 else
4504                         invalid_queue = true;
4505                 break;
4506
4507         case GAUDI_QUEUE_ID_MME_0_0:
4508                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4509                 break;
4510
4511         case GAUDI_QUEUE_ID_MME_0_1:
4512                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4513                 break;
4514
4515         case GAUDI_QUEUE_ID_MME_0_2:
4516                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4517                 break;
4518
4519         case GAUDI_QUEUE_ID_MME_0_3:
4520                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4521                 break;
4522
4523         case GAUDI_QUEUE_ID_MME_1_0:
4524                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4525                 break;
4526
4527         case GAUDI_QUEUE_ID_MME_1_1:
4528                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4529                 break;
4530
4531         case GAUDI_QUEUE_ID_MME_1_2:
4532                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4533                 break;
4534
4535         case GAUDI_QUEUE_ID_MME_1_3:
4536                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4537                 break;
4538
4539         case GAUDI_QUEUE_ID_TPC_0_0:
4540                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4541                 break;
4542
4543         case GAUDI_QUEUE_ID_TPC_0_1:
4544                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4545                 break;
4546
4547         case GAUDI_QUEUE_ID_TPC_0_2:
4548                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4549                 break;
4550
4551         case GAUDI_QUEUE_ID_TPC_0_3:
4552                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4553                 break;
4554
4555         case GAUDI_QUEUE_ID_TPC_1_0:
4556                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4557                 break;
4558
4559         case GAUDI_QUEUE_ID_TPC_1_1:
4560                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4561                 break;
4562
4563         case GAUDI_QUEUE_ID_TPC_1_2:
4564                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4565                 break;
4566
4567         case GAUDI_QUEUE_ID_TPC_1_3:
4568                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4569                 break;
4570
4571         case GAUDI_QUEUE_ID_TPC_2_0:
4572                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4573                 break;
4574
4575         case GAUDI_QUEUE_ID_TPC_2_1:
4576                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4577                 break;
4578
4579         case GAUDI_QUEUE_ID_TPC_2_2:
4580                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4581                 break;
4582
4583         case GAUDI_QUEUE_ID_TPC_2_3:
4584                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4585                 break;
4586
4587         case GAUDI_QUEUE_ID_TPC_3_0:
4588                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4589                 break;
4590
4591         case GAUDI_QUEUE_ID_TPC_3_1:
4592                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4593                 break;
4594
4595         case GAUDI_QUEUE_ID_TPC_3_2:
4596                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4597                 break;
4598
4599         case GAUDI_QUEUE_ID_TPC_3_3:
4600                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4601                 break;
4602
4603         case GAUDI_QUEUE_ID_TPC_4_0:
4604                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4605                 break;
4606
4607         case GAUDI_QUEUE_ID_TPC_4_1:
4608                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4609                 break;
4610
4611         case GAUDI_QUEUE_ID_TPC_4_2:
4612                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4613                 break;
4614
4615         case GAUDI_QUEUE_ID_TPC_4_3:
4616                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4617                 break;
4618
4619         case GAUDI_QUEUE_ID_TPC_5_0:
4620                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4621                 break;
4622
4623         case GAUDI_QUEUE_ID_TPC_5_1:
4624                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4625                 break;
4626
4627         case GAUDI_QUEUE_ID_TPC_5_2:
4628                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4629                 break;
4630
4631         case GAUDI_QUEUE_ID_TPC_5_3:
4632                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4633                 break;
4634
4635         case GAUDI_QUEUE_ID_TPC_6_0:
4636                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4637                 break;
4638
4639         case GAUDI_QUEUE_ID_TPC_6_1:
4640                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4641                 break;
4642
4643         case GAUDI_QUEUE_ID_TPC_6_2:
4644                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4645                 break;
4646
4647         case GAUDI_QUEUE_ID_TPC_6_3:
4648                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4649                 break;
4650
4651         case GAUDI_QUEUE_ID_TPC_7_0:
4652                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4653                 break;
4654
4655         case GAUDI_QUEUE_ID_TPC_7_1:
4656                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4657                 break;
4658
4659         case GAUDI_QUEUE_ID_TPC_7_2:
4660                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4661                 break;
4662
4663         case GAUDI_QUEUE_ID_TPC_7_3:
4664                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4665                 break;
4666
4667         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4668                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4669                         invalid_queue = true;
4670
4671                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4672                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4673                 break;
4674
4675         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4676                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4677                         invalid_queue = true;
4678
4679                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4680                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4681                 break;
4682
4683         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4684                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4685                         invalid_queue = true;
4686
4687                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4688                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4689                 break;
4690
4691         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4692                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4693                         invalid_queue = true;
4694
4695                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4696                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4697                 break;
4698
4699         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4700                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4701                         invalid_queue = true;
4702
4703                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4704                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4705                 break;
4706
4707         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4708                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4709                         invalid_queue = true;
4710
4711                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4712                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4713                 break;
4714
4715         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4716                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4717                         invalid_queue = true;
4718
4719                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4720                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4721                 break;
4722
4723         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4724                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4725                         invalid_queue = true;
4726
4727                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4728                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4729                 break;
4730
4731         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4732                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4733                         invalid_queue = true;
4734
4735                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4736                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4737                 break;
4738
4739         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4740                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4741                         invalid_queue = true;
4742
4743                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4744                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4745                 break;
4746
4747         default:
4748                 invalid_queue = true;
4749         }
4750
4751         if (invalid_queue) {
4752                 /* Should never get here */
4753                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4754                         hw_queue_id);
4755                 return;
4756         }
4757
4758         db_value = pi;
4759
4760         /* ring the doorbell */
4761         WREG32(db_reg_offset, db_value);
4762
4763         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4764                 /* make sure device CPU will read latest data from host */
4765                 mb();
4766
4767                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4768                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4769                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4770
4771                 WREG32(irq_handler_offset,
4772                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4773         }
4774 }
4775
4776 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4777                                 struct hl_bd *bd)
4778 {
4779         __le64 *pbd = (__le64 *) bd;
4780
4781         /* The QMANs are on the host memory so a simple copy suffice */
4782         pqe[0] = pbd[0];
4783         pqe[1] = pbd[1];
4784 }
4785
4786 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4787                                         dma_addr_t *dma_handle, gfp_t flags)
4788 {
4789         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4790                                                 dma_handle, flags);
4791
4792         /* Shift to the device's base physical address of host memory */
4793         if (kernel_addr)
4794                 *dma_handle += HOST_PHYS_BASE;
4795
4796         return kernel_addr;
4797 }
4798
4799 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4800                 void *cpu_addr, dma_addr_t dma_handle)
4801 {
4802         /* Cancel the device's base physical address of host memory */
4803         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4804
4805         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4806 }
4807
4808 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4809 {
4810         struct asic_fixed_properties *prop = &hdev->asic_prop;
4811         u64  cur_addr = DRAM_BASE_ADDR_USER;
4812         u32 val;
4813         u32 chunk_size;
4814         int rc, dma_id;
4815
4816         while (cur_addr < prop->dram_end_address) {
4817                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4818                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4819
4820                         chunk_size =
4821                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4822
4823                         dev_dbg(hdev->dev,
4824                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4825                                 cur_addr, cur_addr + chunk_size);
4826
4827                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4828                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4829                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4830                                                 lower_32_bits(cur_addr));
4831                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4832                                                 upper_32_bits(cur_addr));
4833                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4834                                         chunk_size);
4835                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4836                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4837                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4838
4839                         cur_addr += chunk_size;
4840
4841                         if (cur_addr == prop->dram_end_address)
4842                                 break;
4843                 }
4844
4845                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4846                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4847
4848                         rc = hl_poll_timeout(
4849                                 hdev,
4850                                 mmDMA0_CORE_STS0 + dma_offset,
4851                                 val,
4852                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4853                                 1000,
4854                                 HBM_SCRUBBING_TIMEOUT_US);
4855
4856                         if (rc) {
4857                                 dev_err(hdev->dev,
4858                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4859                                         dma_id);
4860                                 return -EIO;
4861                         }
4862                 }
4863         }
4864
4865         return 0;
4866 }
4867
4868 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4869 {
4870         struct asic_fixed_properties *prop = &hdev->asic_prop;
4871         struct gaudi_device *gaudi = hdev->asic_specific;
4872         int rc = 0;
4873         u64 val = 0;
4874
4875         if (!hdev->memory_scrub)
4876                 return 0;
4877
4878         if (!addr && !size) {
4879                 /* Wait till device is idle */
4880                 rc = hl_poll_timeout(
4881                                 hdev,
4882                                 mmDMA0_CORE_STS0/* dummy */,
4883                                 val/* dummy */,
4884                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4885                                                 0, NULL)),
4886                                                 1000,
4887                                                 HBM_SCRUBBING_TIMEOUT_US);
4888                 if (rc) {
4889                         dev_err(hdev->dev, "waiting for idle timeout\n");
4890                         return -EIO;
4891                 }
4892
4893                 /* Scrub SRAM */
4894                 addr = prop->sram_user_base_address;
4895                 size = hdev->pldm ? 0x10000 :
4896                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4897                 val = 0x7777777777777777ull;
4898
4899                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4900                 if (rc) {
4901                         dev_err(hdev->dev,
4902                                 "Failed to clear SRAM in mem scrub all\n");
4903                         return rc;
4904                 }
4905
4906                 mutex_lock(&gaudi->clk_gate_mutex);
4907                 hdev->asic_funcs->disable_clock_gating(hdev);
4908
4909                 /* Scrub HBM using all DMA channels in parallel */
4910                 rc = gaudi_hbm_scrubbing(hdev);
4911                 if (rc)
4912                         dev_err(hdev->dev,
4913                                 "Failed to clear HBM in mem scrub all\n");
4914
4915                 hdev->asic_funcs->set_clock_gating(hdev);
4916                 mutex_unlock(&gaudi->clk_gate_mutex);
4917         }
4918
4919         return rc;
4920 }
4921
4922 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4923                                 u32 queue_id, dma_addr_t *dma_handle,
4924                                 u16 *queue_len)
4925 {
4926         struct gaudi_device *gaudi = hdev->asic_specific;
4927         struct gaudi_internal_qman_info *q;
4928
4929         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4930                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4931                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4932                 return NULL;
4933         }
4934
4935         q = &gaudi->internal_qmans[queue_id];
4936         *dma_handle = q->pq_dma_addr;
4937         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4938
4939         return q->pq_kernel_addr;
4940 }
4941
4942 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4943                                 u16 len, u32 timeout, u64 *result)
4944 {
4945         struct gaudi_device *gaudi = hdev->asic_specific;
4946
4947         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4948                 if (result)
4949                         *result = 0;
4950                 return 0;
4951         }
4952
4953         if (!timeout)
4954                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4955
4956         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4957                                                 timeout, result);
4958 }
4959
4960 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4961 {
4962         struct packet_msg_prot *fence_pkt;
4963         dma_addr_t pkt_dma_addr;
4964         u32 fence_val, tmp, timeout_usec;
4965         dma_addr_t fence_dma_addr;
4966         u32 *fence_ptr;
4967         int rc;
4968
4969         if (hdev->pldm)
4970                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4971         else
4972                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4973
4974         fence_val = GAUDI_QMAN0_FENCE_VAL;
4975
4976         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4977                                                         &fence_dma_addr);
4978         if (!fence_ptr) {
4979                 dev_err(hdev->dev,
4980                         "Failed to allocate memory for H/W queue %d testing\n",
4981                         hw_queue_id);
4982                 return -ENOMEM;
4983         }
4984
4985         *fence_ptr = 0;
4986
4987         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4988                                         sizeof(struct packet_msg_prot),
4989                                         GFP_KERNEL, &pkt_dma_addr);
4990         if (!fence_pkt) {
4991                 dev_err(hdev->dev,
4992                         "Failed to allocate packet for H/W queue %d testing\n",
4993                         hw_queue_id);
4994                 rc = -ENOMEM;
4995                 goto free_fence_ptr;
4996         }
4997
4998         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4999         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5000         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5001
5002         fence_pkt->ctl = cpu_to_le32(tmp);
5003         fence_pkt->value = cpu_to_le32(fence_val);
5004         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
5005
5006         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
5007                                         sizeof(struct packet_msg_prot),
5008                                         pkt_dma_addr);
5009         if (rc) {
5010                 dev_err(hdev->dev,
5011                         "Failed to send fence packet to H/W queue %d\n",
5012                         hw_queue_id);
5013                 goto free_pkt;
5014         }
5015
5016         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
5017                                         1000, timeout_usec, true);
5018
5019         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
5020
5021         if (rc == -ETIMEDOUT) {
5022                 dev_err(hdev->dev,
5023                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5024                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5025                 rc = -EIO;
5026         }
5027
5028 free_pkt:
5029         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5030                                         pkt_dma_addr);
5031 free_fence_ptr:
5032         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5033                                         fence_dma_addr);
5034         return rc;
5035 }
5036
5037 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5038 {
5039         struct gaudi_device *gaudi = hdev->asic_specific;
5040
5041         /*
5042          * check capability here as send_cpu_message() won't update the result
5043          * value if no capability
5044          */
5045         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5046                 return 0;
5047
5048         return hl_fw_test_cpu_queue(hdev);
5049 }
5050
5051 static int gaudi_test_queues(struct hl_device *hdev)
5052 {
5053         int i, rc, ret_val = 0;
5054
5055         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5056                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5057                         rc = gaudi_test_queue(hdev, i);
5058                         if (rc)
5059                                 ret_val = -EINVAL;
5060                 }
5061         }
5062
5063         rc = gaudi_test_cpu_queue(hdev);
5064         if (rc)
5065                 ret_val = -EINVAL;
5066
5067         return ret_val;
5068 }
5069
5070 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5071                 gfp_t mem_flags, dma_addr_t *dma_handle)
5072 {
5073         void *kernel_addr;
5074
5075         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5076                 return NULL;
5077
5078         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5079
5080         /* Shift to the device's base physical address of host memory */
5081         if (kernel_addr)
5082                 *dma_handle += HOST_PHYS_BASE;
5083
5084         return kernel_addr;
5085 }
5086
5087 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5088                         dma_addr_t dma_addr)
5089 {
5090         /* Cancel the device's base physical address of host memory */
5091         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5092
5093         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5094 }
5095
5096 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5097                                         size_t size, dma_addr_t *dma_handle)
5098 {
5099         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5100 }
5101
5102 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5103                                                 size_t size, void *vaddr)
5104 {
5105         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5106 }
5107
5108 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5109                         int nents, enum dma_data_direction dir)
5110 {
5111         struct scatterlist *sg;
5112         int i;
5113
5114         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5115                 return -ENOMEM;
5116
5117         /* Shift to the device's base physical address of host memory */
5118         for_each_sg(sgl, sg, nents, i)
5119                 sg->dma_address += HOST_PHYS_BASE;
5120
5121         return 0;
5122 }
5123
5124 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5125                         int nents, enum dma_data_direction dir)
5126 {
5127         struct scatterlist *sg;
5128         int i;
5129
5130         /* Cancel the device's base physical address of host memory */
5131         for_each_sg(sgl, sg, nents, i)
5132                 sg->dma_address -= HOST_PHYS_BASE;
5133
5134         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5135 }
5136
5137 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5138                                         struct sg_table *sgt)
5139 {
5140         struct scatterlist *sg, *sg_next_iter;
5141         u32 count, dma_desc_cnt;
5142         u64 len, len_next;
5143         dma_addr_t addr, addr_next;
5144
5145         dma_desc_cnt = 0;
5146
5147         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5148
5149                 len = sg_dma_len(sg);
5150                 addr = sg_dma_address(sg);
5151
5152                 if (len == 0)
5153                         break;
5154
5155                 while ((count + 1) < sgt->nents) {
5156                         sg_next_iter = sg_next(sg);
5157                         len_next = sg_dma_len(sg_next_iter);
5158                         addr_next = sg_dma_address(sg_next_iter);
5159
5160                         if (len_next == 0)
5161                                 break;
5162
5163                         if ((addr + len == addr_next) &&
5164                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5165                                 len += len_next;
5166                                 count++;
5167                                 sg = sg_next_iter;
5168                         } else {
5169                                 break;
5170                         }
5171                 }
5172
5173                 dma_desc_cnt++;
5174         }
5175
5176         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5177 }
5178
5179 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5180                                 struct hl_cs_parser *parser,
5181                                 struct packet_lin_dma *user_dma_pkt,
5182                                 u64 addr, enum dma_data_direction dir)
5183 {
5184         struct hl_userptr *userptr;
5185         int rc;
5186
5187         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5188                         parser->job_userptr_list, &userptr))
5189                 goto already_pinned;
5190
5191         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5192         if (!userptr)
5193                 return -ENOMEM;
5194
5195         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5196                                 userptr);
5197         if (rc)
5198                 goto free_userptr;
5199
5200         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5201
5202         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5203                                         userptr->sgt->nents, dir);
5204         if (rc) {
5205                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5206                 goto unpin_memory;
5207         }
5208
5209         userptr->dma_mapped = true;
5210         userptr->dir = dir;
5211
5212 already_pinned:
5213         parser->patched_cb_size +=
5214                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5215
5216         return 0;
5217
5218 unpin_memory:
5219         list_del(&userptr->job_node);
5220         hl_unpin_host_memory(hdev, userptr);
5221 free_userptr:
5222         kfree(userptr);
5223         return rc;
5224 }
5225
5226 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5227                                 struct hl_cs_parser *parser,
5228                                 struct packet_lin_dma *user_dma_pkt,
5229                                 bool src_in_host)
5230 {
5231         enum dma_data_direction dir;
5232         bool skip_host_mem_pin = false, user_memset;
5233         u64 addr;
5234         int rc = 0;
5235
5236         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5237                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5238                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5239
5240         if (src_in_host) {
5241                 if (user_memset)
5242                         skip_host_mem_pin = true;
5243
5244                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5245                 dir = DMA_TO_DEVICE;
5246                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5247         } else {
5248                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5249                 dir = DMA_FROM_DEVICE;
5250                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5251                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5252                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5253         }
5254
5255         if (skip_host_mem_pin)
5256                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5257         else
5258                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5259                                                 addr, dir);
5260
5261         return rc;
5262 }
5263
5264 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5265                                 struct hl_cs_parser *parser,
5266                                 struct packet_lin_dma *user_dma_pkt)
5267 {
5268         bool src_in_host = false;
5269         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5270                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5271                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5272
5273         dev_dbg(hdev->dev, "DMA packet details:\n");
5274         dev_dbg(hdev->dev, "source == 0x%llx\n",
5275                                 le64_to_cpu(user_dma_pkt->src_addr));
5276         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5277         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5278
5279         /*
5280          * Special handling for DMA with size 0. Bypass all validations
5281          * because no transactions will be done except for WR_COMP, which
5282          * is not a security issue
5283          */
5284         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5285                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5286                 return 0;
5287         }
5288
5289         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5290                 src_in_host = true;
5291
5292         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5293                                                 src_in_host);
5294 }
5295
5296 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5297                                         struct hl_cs_parser *parser,
5298                                         struct packet_load_and_exe *user_pkt)
5299 {
5300         u32 cfg;
5301
5302         cfg = le32_to_cpu(user_pkt->cfg);
5303
5304         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5305                 dev_err(hdev->dev,
5306                         "User not allowed to use Load and Execute\n");
5307                 return -EPERM;
5308         }
5309
5310         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5311
5312         return 0;
5313 }
5314
5315 static int gaudi_validate_cb(struct hl_device *hdev,
5316                         struct hl_cs_parser *parser, bool is_mmu)
5317 {
5318         u32 cb_parsed_length = 0;
5319         int rc = 0;
5320
5321         parser->patched_cb_size = 0;
5322
5323         /* cb_user_size is more than 0 so loop will always be executed */
5324         while (cb_parsed_length < parser->user_cb_size) {
5325                 enum packet_id pkt_id;
5326                 u16 pkt_size;
5327                 struct gaudi_packet *user_pkt;
5328
5329                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5330
5331                 pkt_id = (enum packet_id) (
5332                                 (le64_to_cpu(user_pkt->header) &
5333                                 PACKET_HEADER_PACKET_ID_MASK) >>
5334                                         PACKET_HEADER_PACKET_ID_SHIFT);
5335
5336                 if (!validate_packet_id(pkt_id)) {
5337                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5338                         rc = -EINVAL;
5339                         break;
5340                 }
5341
5342                 pkt_size = gaudi_packet_sizes[pkt_id];
5343                 cb_parsed_length += pkt_size;
5344                 if (cb_parsed_length > parser->user_cb_size) {
5345                         dev_err(hdev->dev,
5346                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5347                         rc = -EINVAL;
5348                         break;
5349                 }
5350
5351                 switch (pkt_id) {
5352                 case PACKET_MSG_PROT:
5353                         dev_err(hdev->dev,
5354                                 "User not allowed to use MSG_PROT\n");
5355                         rc = -EPERM;
5356                         break;
5357
5358                 case PACKET_CP_DMA:
5359                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5360                         rc = -EPERM;
5361                         break;
5362
5363                 case PACKET_STOP:
5364                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5365                         rc = -EPERM;
5366                         break;
5367
5368                 case PACKET_WREG_BULK:
5369                         dev_err(hdev->dev,
5370                                 "User not allowed to use WREG_BULK\n");
5371                         rc = -EPERM;
5372                         break;
5373
5374                 case PACKET_LOAD_AND_EXE:
5375                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5376                                 (struct packet_load_and_exe *) user_pkt);
5377                         break;
5378
5379                 case PACKET_LIN_DMA:
5380                         parser->contains_dma_pkt = true;
5381                         if (is_mmu)
5382                                 parser->patched_cb_size += pkt_size;
5383                         else
5384                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5385                                         (struct packet_lin_dma *) user_pkt);
5386                         break;
5387
5388                 case PACKET_WREG_32:
5389                 case PACKET_MSG_LONG:
5390                 case PACKET_MSG_SHORT:
5391                 case PACKET_REPEAT:
5392                 case PACKET_FENCE:
5393                 case PACKET_NOP:
5394                 case PACKET_ARB_POINT:
5395                         parser->patched_cb_size += pkt_size;
5396                         break;
5397
5398                 default:
5399                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5400                                 pkt_id);
5401                         rc = -EINVAL;
5402                         break;
5403                 }
5404
5405                 if (rc)
5406                         break;
5407         }
5408
5409         /*
5410          * The new CB should have space at the end for two MSG_PROT packets:
5411          * 1. A packet that will act as a completion packet
5412          * 2. A packet that will generate MSI-X interrupt
5413          */
5414         if (parser->completion)
5415                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5416
5417         return rc;
5418 }
5419
5420 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5421                                 struct hl_cs_parser *parser,
5422                                 struct packet_lin_dma *user_dma_pkt,
5423                                 struct packet_lin_dma *new_dma_pkt,
5424                                 u32 *new_dma_pkt_size)
5425 {
5426         struct hl_userptr *userptr;
5427         struct scatterlist *sg, *sg_next_iter;
5428         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5429         u64 len, len_next;
5430         dma_addr_t dma_addr, dma_addr_next;
5431         u64 device_memory_addr, addr;
5432         enum dma_data_direction dir;
5433         struct sg_table *sgt;
5434         bool src_in_host = false;
5435         bool skip_host_mem_pin = false;
5436         bool user_memset;
5437
5438         ctl = le32_to_cpu(user_dma_pkt->ctl);
5439
5440         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5441                 src_in_host = true;
5442
5443         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5444                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5445
5446         if (src_in_host) {
5447                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5448                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5449                 dir = DMA_TO_DEVICE;
5450                 if (user_memset)
5451                         skip_host_mem_pin = true;
5452         } else {
5453                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5454                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5455                 dir = DMA_FROM_DEVICE;
5456         }
5457
5458         if ((!skip_host_mem_pin) &&
5459                 (!hl_userptr_is_pinned(hdev, addr,
5460                                         le32_to_cpu(user_dma_pkt->tsize),
5461                                         parser->job_userptr_list, &userptr))) {
5462                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5463                                 addr, user_dma_pkt->tsize);
5464                 return -EFAULT;
5465         }
5466
5467         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5468                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5469                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5470                 return 0;
5471         }
5472
5473         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5474
5475         sgt = userptr->sgt;
5476         dma_desc_cnt = 0;
5477
5478         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5479                 len = sg_dma_len(sg);
5480                 dma_addr = sg_dma_address(sg);
5481
5482                 if (len == 0)
5483                         break;
5484
5485                 while ((count + 1) < sgt->nents) {
5486                         sg_next_iter = sg_next(sg);
5487                         len_next = sg_dma_len(sg_next_iter);
5488                         dma_addr_next = sg_dma_address(sg_next_iter);
5489
5490                         if (len_next == 0)
5491                                 break;
5492
5493                         if ((dma_addr + len == dma_addr_next) &&
5494                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5495                                 len += len_next;
5496                                 count++;
5497                                 sg = sg_next_iter;
5498                         } else {
5499                                 break;
5500                         }
5501                 }
5502
5503                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5504                 if (likely(dma_desc_cnt))
5505                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5506                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5507                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5508                 new_dma_pkt->tsize = cpu_to_le32(len);
5509
5510                 if (dir == DMA_TO_DEVICE) {
5511                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5512                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5513                 } else {
5514                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5515                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5516                 }
5517
5518                 if (!user_memset)
5519                         device_memory_addr += len;
5520                 dma_desc_cnt++;
5521                 new_dma_pkt++;
5522         }
5523
5524         if (!dma_desc_cnt) {
5525                 dev_err(hdev->dev,
5526                         "Error of 0 SG entries when patching DMA packet\n");
5527                 return -EFAULT;
5528         }
5529
5530         /* Fix the last dma packet - wrcomp must be as user set it */
5531         new_dma_pkt--;
5532         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5533
5534         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5535
5536         return 0;
5537 }
5538
5539 static int gaudi_patch_cb(struct hl_device *hdev,
5540                                 struct hl_cs_parser *parser)
5541 {
5542         u32 cb_parsed_length = 0;
5543         u32 cb_patched_cur_length = 0;
5544         int rc = 0;
5545
5546         /* cb_user_size is more than 0 so loop will always be executed */
5547         while (cb_parsed_length < parser->user_cb_size) {
5548                 enum packet_id pkt_id;
5549                 u16 pkt_size;
5550                 u32 new_pkt_size = 0;
5551                 struct gaudi_packet *user_pkt, *kernel_pkt;
5552
5553                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5554                 kernel_pkt = parser->patched_cb->kernel_address +
5555                                         cb_patched_cur_length;
5556
5557                 pkt_id = (enum packet_id) (
5558                                 (le64_to_cpu(user_pkt->header) &
5559                                 PACKET_HEADER_PACKET_ID_MASK) >>
5560                                         PACKET_HEADER_PACKET_ID_SHIFT);
5561
5562                 if (!validate_packet_id(pkt_id)) {
5563                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5564                         rc = -EINVAL;
5565                         break;
5566                 }
5567
5568                 pkt_size = gaudi_packet_sizes[pkt_id];
5569                 cb_parsed_length += pkt_size;
5570                 if (cb_parsed_length > parser->user_cb_size) {
5571                         dev_err(hdev->dev,
5572                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5573                         rc = -EINVAL;
5574                         break;
5575                 }
5576
5577                 switch (pkt_id) {
5578                 case PACKET_LIN_DMA:
5579                         rc = gaudi_patch_dma_packet(hdev, parser,
5580                                         (struct packet_lin_dma *) user_pkt,
5581                                         (struct packet_lin_dma *) kernel_pkt,
5582                                         &new_pkt_size);
5583                         cb_patched_cur_length += new_pkt_size;
5584                         break;
5585
5586                 case PACKET_MSG_PROT:
5587                         dev_err(hdev->dev,
5588                                 "User not allowed to use MSG_PROT\n");
5589                         rc = -EPERM;
5590                         break;
5591
5592                 case PACKET_CP_DMA:
5593                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5594                         rc = -EPERM;
5595                         break;
5596
5597                 case PACKET_STOP:
5598                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5599                         rc = -EPERM;
5600                         break;
5601
5602                 case PACKET_WREG_32:
5603                 case PACKET_WREG_BULK:
5604                 case PACKET_MSG_LONG:
5605                 case PACKET_MSG_SHORT:
5606                 case PACKET_REPEAT:
5607                 case PACKET_FENCE:
5608                 case PACKET_NOP:
5609                 case PACKET_ARB_POINT:
5610                 case PACKET_LOAD_AND_EXE:
5611                         memcpy(kernel_pkt, user_pkt, pkt_size);
5612                         cb_patched_cur_length += pkt_size;
5613                         break;
5614
5615                 default:
5616                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5617                                 pkt_id);
5618                         rc = -EINVAL;
5619                         break;
5620                 }
5621
5622                 if (rc)
5623                         break;
5624         }
5625
5626         return rc;
5627 }
5628
5629 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5630                 struct hl_cs_parser *parser)
5631 {
5632         u64 patched_cb_handle;
5633         u32 patched_cb_size;
5634         struct hl_cb *user_cb;
5635         int rc;
5636
5637         /*
5638          * The new CB should have space at the end for two MSG_PROT pkt:
5639          * 1. A packet that will act as a completion packet
5640          * 2. A packet that will generate MSI interrupt
5641          */
5642         if (parser->completion)
5643                 parser->patched_cb_size = parser->user_cb_size +
5644                                 sizeof(struct packet_msg_prot) * 2;
5645         else
5646                 parser->patched_cb_size = parser->user_cb_size;
5647
5648         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5649                                 parser->patched_cb_size, false, false,
5650                                 &patched_cb_handle);
5651
5652         if (rc) {
5653                 dev_err(hdev->dev,
5654                         "Failed to allocate patched CB for DMA CS %d\n",
5655                         rc);
5656                 return rc;
5657         }
5658
5659         patched_cb_handle >>= PAGE_SHIFT;
5660         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5661                                 (u32) patched_cb_handle);
5662         /* hl_cb_get should never fail */
5663         if (!parser->patched_cb) {
5664                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5665                         (u32) patched_cb_handle);
5666                 rc = -EFAULT;
5667                 goto out;
5668         }
5669
5670         /*
5671          * The check that parser->user_cb_size <= parser->user_cb->size was done
5672          * in validate_queue_index().
5673          */
5674         memcpy(parser->patched_cb->kernel_address,
5675                 parser->user_cb->kernel_address,
5676                 parser->user_cb_size);
5677
5678         patched_cb_size = parser->patched_cb_size;
5679
5680         /* Validate patched CB instead of user CB */
5681         user_cb = parser->user_cb;
5682         parser->user_cb = parser->patched_cb;
5683         rc = gaudi_validate_cb(hdev, parser, true);
5684         parser->user_cb = user_cb;
5685
5686         if (rc) {
5687                 hl_cb_put(parser->patched_cb);
5688                 goto out;
5689         }
5690
5691         if (patched_cb_size != parser->patched_cb_size) {
5692                 dev_err(hdev->dev, "user CB size mismatch\n");
5693                 hl_cb_put(parser->patched_cb);
5694                 rc = -EINVAL;
5695                 goto out;
5696         }
5697
5698 out:
5699         /*
5700          * Always call cb destroy here because we still have 1 reference
5701          * to it by calling cb_get earlier. After the job will be completed,
5702          * cb_put will release it, but here we want to remove it from the
5703          * idr
5704          */
5705         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5706                                         patched_cb_handle << PAGE_SHIFT);
5707
5708         return rc;
5709 }
5710
5711 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5712                 struct hl_cs_parser *parser)
5713 {
5714         u64 patched_cb_handle;
5715         int rc;
5716
5717         rc = gaudi_validate_cb(hdev, parser, false);
5718
5719         if (rc)
5720                 goto free_userptr;
5721
5722         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5723                                 parser->patched_cb_size, false, false,
5724                                 &patched_cb_handle);
5725         if (rc) {
5726                 dev_err(hdev->dev,
5727                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5728                 goto free_userptr;
5729         }
5730
5731         patched_cb_handle >>= PAGE_SHIFT;
5732         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5733                                 (u32) patched_cb_handle);
5734         /* hl_cb_get should never fail here */
5735         if (!parser->patched_cb) {
5736                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5737                                 (u32) patched_cb_handle);
5738                 rc = -EFAULT;
5739                 goto out;
5740         }
5741
5742         rc = gaudi_patch_cb(hdev, parser);
5743
5744         if (rc)
5745                 hl_cb_put(parser->patched_cb);
5746
5747 out:
5748         /*
5749          * Always call cb destroy here because we still have 1 reference
5750          * to it by calling cb_get earlier. After the job will be completed,
5751          * cb_put will release it, but here we want to remove it from the
5752          * idr
5753          */
5754         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5755                                 patched_cb_handle << PAGE_SHIFT);
5756
5757 free_userptr:
5758         if (rc)
5759                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5760         return rc;
5761 }
5762
5763 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5764                                         struct hl_cs_parser *parser)
5765 {
5766         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5767         struct gaudi_device *gaudi = hdev->asic_specific;
5768         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5769                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5770
5771         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5772                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5773                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5774                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5775                                 parser->hw_queue_id);
5776                 return -EINVAL;
5777         }
5778
5779         /* For internal queue jobs just check if CB address is valid */
5780         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5781                                         parser->user_cb_size,
5782                                         asic_prop->sram_user_base_address,
5783                                         asic_prop->sram_end_address))
5784                 return 0;
5785
5786         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5787                                         parser->user_cb_size,
5788                                         asic_prop->dram_user_base_address,
5789                                         asic_prop->dram_end_address))
5790                 return 0;
5791
5792         /* PMMU and HPMMU addresses are equal, check only one of them */
5793         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5794                                         parser->user_cb_size,
5795                                         asic_prop->pmmu.start_addr,
5796                                         asic_prop->pmmu.end_addr))
5797                 return 0;
5798
5799         dev_err(hdev->dev,
5800                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5801                 parser->user_cb, parser->user_cb_size);
5802
5803         return -EFAULT;
5804 }
5805
5806 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5807 {
5808         struct gaudi_device *gaudi = hdev->asic_specific;
5809
5810         if (parser->queue_type == QUEUE_TYPE_INT)
5811                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5812
5813         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5814                 return gaudi_parse_cb_mmu(hdev, parser);
5815         else
5816                 return gaudi_parse_cb_no_mmu(hdev, parser);
5817 }
5818
5819 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5820                                         void *kernel_address, u32 len,
5821                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5822                                         bool eb)
5823 {
5824         struct gaudi_device *gaudi = hdev->asic_specific;
5825         struct packet_msg_prot *cq_pkt;
5826         u64 msi_addr;
5827         u32 tmp;
5828
5829         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5830
5831         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5832         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5833
5834         if (eb)
5835                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5836
5837         cq_pkt->ctl = cpu_to_le32(tmp);
5838         cq_pkt->value = cpu_to_le32(cq_val);
5839         cq_pkt->addr = cpu_to_le64(cq_addr);
5840
5841         cq_pkt++;
5842
5843         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5844         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5845         cq_pkt->ctl = cpu_to_le32(tmp);
5846         cq_pkt->value = cpu_to_le32(1);
5847
5848         if (gaudi->multi_msi_mode)
5849                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5850         else
5851                 msi_addr = mmPCIE_CORE_MSI_REQ;
5852
5853         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5854 }
5855
5856 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5857 {
5858         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5859 }
5860
5861 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5862                                         u32 size, u64 val)
5863 {
5864         struct packet_lin_dma *lin_dma_pkt;
5865         struct hl_cs_job *job;
5866         u32 cb_size, ctl, err_cause;
5867         struct hl_cb *cb;
5868         u64 id;
5869         int rc;
5870
5871         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5872         if (!cb)
5873                 return -EFAULT;
5874
5875         lin_dma_pkt = cb->kernel_address;
5876         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5877         cb_size = sizeof(*lin_dma_pkt);
5878
5879         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5880         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5881         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5882         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5883         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5884
5885         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5886         lin_dma_pkt->src_addr = cpu_to_le64(val);
5887         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5888         lin_dma_pkt->tsize = cpu_to_le32(size);
5889
5890         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5891         if (!job) {
5892                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5893                 rc = -ENOMEM;
5894                 goto release_cb;
5895         }
5896
5897         /* Verify DMA is OK */
5898         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5899         if (err_cause && !hdev->init_done) {
5900                 dev_dbg(hdev->dev,
5901                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5902                         err_cause);
5903                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5904         }
5905
5906         job->id = 0;
5907         job->user_cb = cb;
5908         atomic_inc(&job->user_cb->cs_cnt);
5909         job->user_cb_size = cb_size;
5910         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5911         job->patched_cb = job->user_cb;
5912         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5913
5914         hl_debugfs_add_job(hdev, job);
5915
5916         rc = gaudi_send_job_on_qman0(hdev, job);
5917         hl_debugfs_remove_job(hdev, job);
5918         kfree(job);
5919         atomic_dec(&cb->cs_cnt);
5920
5921         /* Verify DMA is OK */
5922         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5923         if (err_cause) {
5924                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5925                 rc = -EIO;
5926                 if (!hdev->init_done) {
5927                         dev_dbg(hdev->dev,
5928                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5929                                 err_cause);
5930                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5931                 }
5932         }
5933
5934 release_cb:
5935         id = cb->id;
5936         hl_cb_put(cb);
5937         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5938
5939         return rc;
5940 }
5941
5942 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5943                                         u32 num_regs, u32 val)
5944 {
5945         struct packet_msg_long *pkt;
5946         struct hl_cs_job *job;
5947         u32 cb_size, ctl;
5948         struct hl_cb *cb;
5949         int i, rc;
5950
5951         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5952
5953         if (cb_size > SZ_2M) {
5954                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5955                 return -ENOMEM;
5956         }
5957
5958         cb = hl_cb_kernel_create(hdev, cb_size, false);
5959         if (!cb)
5960                 return -EFAULT;
5961
5962         pkt = cb->kernel_address;
5963
5964         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5965         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5966         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5967         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5968         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5969
5970         for (i = 0; i < num_regs ; i++, pkt++) {
5971                 pkt->ctl = cpu_to_le32(ctl);
5972                 pkt->value = cpu_to_le32(val);
5973                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5974         }
5975
5976         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5977         if (!job) {
5978                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5979                 rc = -ENOMEM;
5980                 goto release_cb;
5981         }
5982
5983         job->id = 0;
5984         job->user_cb = cb;
5985         atomic_inc(&job->user_cb->cs_cnt);
5986         job->user_cb_size = cb_size;
5987         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5988         job->patched_cb = job->user_cb;
5989         job->job_cb_size = cb_size;
5990
5991         hl_debugfs_add_job(hdev, job);
5992
5993         rc = gaudi_send_job_on_qman0(hdev, job);
5994         hl_debugfs_remove_job(hdev, job);
5995         kfree(job);
5996         atomic_dec(&cb->cs_cnt);
5997
5998 release_cb:
5999         hl_cb_put(cb);
6000         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
6001
6002         return rc;
6003 }
6004
6005 static int gaudi_restore_sm_registers(struct hl_device *hdev)
6006 {
6007         u64 base_addr;
6008         u32 num_regs;
6009         int rc;
6010
6011         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6012         num_regs = NUM_OF_SOB_IN_BLOCK;
6013         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6014         if (rc) {
6015                 dev_err(hdev->dev, "failed resetting SM registers");
6016                 return -ENOMEM;
6017         }
6018
6019         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
6020         num_regs = NUM_OF_SOB_IN_BLOCK;
6021         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6022         if (rc) {
6023                 dev_err(hdev->dev, "failed resetting SM registers");
6024                 return -ENOMEM;
6025         }
6026
6027         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6028         num_regs = NUM_OF_SOB_IN_BLOCK;
6029         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6030         if (rc) {
6031                 dev_err(hdev->dev, "failed resetting SM registers");
6032                 return -ENOMEM;
6033         }
6034
6035         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6036         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6037         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6038         if (rc) {
6039                 dev_err(hdev->dev, "failed resetting SM registers");
6040                 return -ENOMEM;
6041         }
6042
6043         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6044         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6045         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6046         if (rc) {
6047                 dev_err(hdev->dev, "failed resetting SM registers");
6048                 return -ENOMEM;
6049         }
6050
6051         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6052         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6053         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6054         if (rc) {
6055                 dev_err(hdev->dev, "failed resetting SM registers");
6056                 return -ENOMEM;
6057         }
6058
6059         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6060                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6061         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6062         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6063         if (rc) {
6064                 dev_err(hdev->dev, "failed resetting SM registers");
6065                 return -ENOMEM;
6066         }
6067
6068         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6069                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6070         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6071         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6072         if (rc) {
6073                 dev_err(hdev->dev, "failed resetting SM registers");
6074                 return -ENOMEM;
6075         }
6076
6077         return 0;
6078 }
6079
6080 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6081 {
6082         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6083                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6084         int i;
6085
6086         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6087                 u64 sob_addr = CFG_BASE +
6088                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6089                                 (i * sob_delta);
6090                 u32 dma_offset = i * DMA_CORE_OFFSET;
6091
6092                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6093                                 lower_32_bits(sob_addr));
6094                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6095                                 upper_32_bits(sob_addr));
6096                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6097
6098                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6099                  * modified by the user for SRAM reduction
6100                  */
6101                 if (i > 1)
6102                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6103                                                                 0x00000001);
6104         }
6105 }
6106
6107 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6108 {
6109         u32 qman_offset;
6110         int i;
6111
6112         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6113                 qman_offset = i * DMA_QMAN_OFFSET;
6114                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6115         }
6116
6117         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6118                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6119                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6120         }
6121
6122         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6123                 qman_offset = i * TPC_QMAN_OFFSET;
6124                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6125         }
6126
6127         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6128                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6129                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6130                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6131         }
6132 }
6133
6134 static int gaudi_restore_user_registers(struct hl_device *hdev)
6135 {
6136         int rc;
6137
6138         rc = gaudi_restore_sm_registers(hdev);
6139         if (rc)
6140                 return rc;
6141
6142         gaudi_restore_dma_registers(hdev);
6143         gaudi_restore_qm_registers(hdev);
6144
6145         return 0;
6146 }
6147
6148 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6149 {
6150         return 0;
6151 }
6152
6153 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6154 {
6155         struct asic_fixed_properties *prop = &hdev->asic_prop;
6156         struct gaudi_device *gaudi = hdev->asic_specific;
6157         u64 addr = prop->mmu_pgt_addr;
6158         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6159
6160         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6161                 return 0;
6162
6163         return gaudi_memset_device_memory(hdev, addr, size, 0);
6164 }
6165
6166 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6167 {
6168
6169 }
6170
6171 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6172                         bool user_address, u32 *val)
6173 {
6174         struct asic_fixed_properties *prop = &hdev->asic_prop;
6175         struct gaudi_device *gaudi = hdev->asic_specific;
6176         u64 hbm_bar_addr, host_phys_end;
6177         int rc = 0;
6178
6179         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6180
6181         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6182
6183                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6184                                 (hdev->clock_gating_mask &
6185                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6186
6187                         dev_err_ratelimited(hdev->dev,
6188                                 "Can't read register - clock gating is enabled!\n");
6189                         rc = -EFAULT;
6190                 } else {
6191                         *val = RREG32(addr - CFG_BASE);
6192                 }
6193
6194         } else if ((addr >= SRAM_BASE_ADDR) &&
6195                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6196                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6197                                 (addr - SRAM_BASE_ADDR));
6198         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6199                 u64 bar_base_addr = DRAM_PHYS_BASE +
6200                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6201
6202                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6203                 if (hbm_bar_addr != U64_MAX) {
6204                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6205                                                 (addr - bar_base_addr));
6206
6207                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6208                                                 hbm_bar_addr);
6209                 }
6210                 if (hbm_bar_addr == U64_MAX)
6211                         rc = -EIO;
6212         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6213                         user_address && !iommu_present(&pci_bus_type)) {
6214                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6215         } else {
6216                 rc = -EFAULT;
6217         }
6218
6219         return rc;
6220 }
6221
6222 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6223                         bool user_address, u32 val)
6224 {
6225         struct asic_fixed_properties *prop = &hdev->asic_prop;
6226         struct gaudi_device *gaudi = hdev->asic_specific;
6227         u64 hbm_bar_addr, host_phys_end;
6228         int rc = 0;
6229
6230         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6231
6232         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6233
6234                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6235                                 (hdev->clock_gating_mask &
6236                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6237
6238                         dev_err_ratelimited(hdev->dev,
6239                                 "Can't write register - clock gating is enabled!\n");
6240                         rc = -EFAULT;
6241                 } else {
6242                         WREG32(addr - CFG_BASE, val);
6243                 }
6244
6245         } else if ((addr >= SRAM_BASE_ADDR) &&
6246                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6247                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6248                                         (addr - SRAM_BASE_ADDR));
6249         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6250                 u64 bar_base_addr = DRAM_PHYS_BASE +
6251                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6252
6253                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6254                 if (hbm_bar_addr != U64_MAX) {
6255                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6256                                                 (addr - bar_base_addr));
6257
6258                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6259                                                 hbm_bar_addr);
6260                 }
6261                 if (hbm_bar_addr == U64_MAX)
6262                         rc = -EIO;
6263         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6264                         user_address && !iommu_present(&pci_bus_type)) {
6265                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6266         } else {
6267                 rc = -EFAULT;
6268         }
6269
6270         return rc;
6271 }
6272
6273 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6274                                 bool user_address, u64 *val)
6275 {
6276         struct asic_fixed_properties *prop = &hdev->asic_prop;
6277         struct gaudi_device *gaudi = hdev->asic_specific;
6278         u64 hbm_bar_addr, host_phys_end;
6279         int rc = 0;
6280
6281         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6282
6283         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6284
6285                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6286                                 (hdev->clock_gating_mask &
6287                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6288
6289                         dev_err_ratelimited(hdev->dev,
6290                                 "Can't read register - clock gating is enabled!\n");
6291                         rc = -EFAULT;
6292                 } else {
6293                         u32 val_l = RREG32(addr - CFG_BASE);
6294                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6295
6296                         *val = (((u64) val_h) << 32) | val_l;
6297                 }
6298
6299         } else if ((addr >= SRAM_BASE_ADDR) &&
6300                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6301                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6302                                 (addr - SRAM_BASE_ADDR));
6303         } else if (addr <=
6304                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6305                 u64 bar_base_addr = DRAM_PHYS_BASE +
6306                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6307
6308                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6309                 if (hbm_bar_addr != U64_MAX) {
6310                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6311                                                 (addr - bar_base_addr));
6312
6313                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6314                                                 hbm_bar_addr);
6315                 }
6316                 if (hbm_bar_addr == U64_MAX)
6317                         rc = -EIO;
6318         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6319                         user_address && !iommu_present(&pci_bus_type)) {
6320                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6321         } else {
6322                 rc = -EFAULT;
6323         }
6324
6325         return rc;
6326 }
6327
6328 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6329                                 bool user_address, u64 val)
6330 {
6331         struct asic_fixed_properties *prop = &hdev->asic_prop;
6332         struct gaudi_device *gaudi = hdev->asic_specific;
6333         u64 hbm_bar_addr, host_phys_end;
6334         int rc = 0;
6335
6336         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6337
6338         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6339
6340                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6341                                 (hdev->clock_gating_mask &
6342                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6343
6344                         dev_err_ratelimited(hdev->dev,
6345                                 "Can't write register - clock gating is enabled!\n");
6346                         rc = -EFAULT;
6347                 } else {
6348                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6349                         WREG32(addr + sizeof(u32) - CFG_BASE,
6350                                 upper_32_bits(val));
6351                 }
6352
6353         } else if ((addr >= SRAM_BASE_ADDR) &&
6354                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6355                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6356                                         (addr - SRAM_BASE_ADDR));
6357         } else if (addr <=
6358                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6359                 u64 bar_base_addr = DRAM_PHYS_BASE +
6360                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6361
6362                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6363                 if (hbm_bar_addr != U64_MAX) {
6364                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6365                                                 (addr - bar_base_addr));
6366
6367                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6368                                                 hbm_bar_addr);
6369                 }
6370                 if (hbm_bar_addr == U64_MAX)
6371                         rc = -EIO;
6372         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6373                         user_address && !iommu_present(&pci_bus_type)) {
6374                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6375         } else {
6376                 rc = -EFAULT;
6377         }
6378
6379         return rc;
6380 }
6381
6382 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6383                                         u32 size_to_dma, dma_addr_t dma_addr)
6384 {
6385         u32 err_cause, val;
6386         u64 dma_offset;
6387         int rc;
6388
6389         dma_offset = dma_id * DMA_CORE_OFFSET;
6390
6391         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6392         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6393         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6394         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6395         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6396         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6397                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6398
6399         rc = hl_poll_timeout(
6400                 hdev,
6401                 mmDMA0_CORE_STS0 + dma_offset,
6402                 val,
6403                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6404                 0,
6405                 1000000);
6406
6407         if (rc) {
6408                 dev_err(hdev->dev,
6409                         "DMA %d timed-out during reading of 0x%llx\n",
6410                         dma_id, addr);
6411                 return -EIO;
6412         }
6413
6414         /* Verify DMA is OK */
6415         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6416         if (err_cause) {
6417                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6418                 dev_dbg(hdev->dev,
6419                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6420                         err_cause);
6421                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6422
6423                 return -EIO;
6424         }
6425
6426         return 0;
6427 }
6428
6429 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6430                                 void *blob_addr)
6431 {
6432         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6433         struct gaudi_device *gaudi = hdev->asic_specific;
6434         u32 qm_glbl_sts0, qm_cgm_sts;
6435         u64 dma_offset, qm_offset;
6436         dma_addr_t dma_addr;
6437         void *kernel_addr;
6438         bool is_eng_idle;
6439         int rc = 0, dma_id;
6440
6441         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6442                                                 hdev, SZ_2M,
6443                                                 &dma_addr,
6444                                                 GFP_KERNEL | __GFP_ZERO);
6445
6446         if (!kernel_addr)
6447                 return -ENOMEM;
6448
6449         mutex_lock(&gaudi->clk_gate_mutex);
6450
6451         hdev->asic_funcs->disable_clock_gating(hdev);
6452
6453         hdev->asic_funcs->hw_queues_lock(hdev);
6454
6455         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6456         dma_offset = dma_id * DMA_CORE_OFFSET;
6457         qm_offset = dma_id * DMA_QMAN_OFFSET;
6458         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6459         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6460         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6461         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6462                       IS_DMA_IDLE(dma_core_sts0);
6463
6464         if (!is_eng_idle) {
6465                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6466                 dma_offset = dma_id * DMA_CORE_OFFSET;
6467                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6468                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6469                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6470                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6471                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6472                               IS_DMA_IDLE(dma_core_sts0);
6473
6474                 if (!is_eng_idle) {
6475                         dev_err_ratelimited(hdev->dev,
6476                                 "Can't read via DMA because it is BUSY\n");
6477                         rc = -EAGAIN;
6478                         goto out;
6479                 }
6480         }
6481
6482         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6483         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6484                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6485
6486         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6487          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6488          * ASID
6489          */
6490         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6491
6492         /* Verify DMA is OK */
6493         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6494         if (err_cause) {
6495                 dev_dbg(hdev->dev,
6496                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6497                         err_cause);
6498                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6499         }
6500
6501         pos = 0;
6502         size_left = size;
6503         size_to_dma = SZ_2M;
6504
6505         while (size_left > 0) {
6506
6507                 if (size_left < SZ_2M)
6508                         size_to_dma = size_left;
6509
6510                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6511                                                 dma_addr);
6512                 if (rc)
6513                         break;
6514
6515                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6516
6517                 if (size_left <= SZ_2M)
6518                         break;
6519
6520                 pos += SZ_2M;
6521                 addr += SZ_2M;
6522                 size_left -= SZ_2M;
6523         }
6524
6525         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6526          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6527          * ASID
6528          */
6529         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6530                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6531
6532         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6533
6534 out:
6535         hdev->asic_funcs->hw_queues_unlock(hdev);
6536
6537         hdev->asic_funcs->set_clock_gating(hdev);
6538
6539         mutex_unlock(&gaudi->clk_gate_mutex);
6540
6541         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6542                                                 dma_addr);
6543
6544         return rc;
6545 }
6546
6547 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6548 {
6549         struct gaudi_device *gaudi = hdev->asic_specific;
6550
6551         if (hdev->hard_reset_pending)
6552                 return U64_MAX;
6553
6554         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6555                         (addr - gaudi->hbm_bar_cur_addr));
6556 }
6557
6558 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6559 {
6560         struct gaudi_device *gaudi = hdev->asic_specific;
6561
6562         if (hdev->hard_reset_pending)
6563                 return;
6564
6565         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6566                         (addr - gaudi->hbm_bar_cur_addr));
6567 }
6568
6569 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6570 {
6571         /* mask to zero the MMBP and ASID bits */
6572         WREG32_AND(reg, ~0x7FF);
6573         WREG32_OR(reg, asid);
6574 }
6575
6576 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6577 {
6578         struct gaudi_device *gaudi = hdev->asic_specific;
6579
6580         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6581                 return;
6582
6583         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6584                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6585                 return;
6586         }
6587
6588         mutex_lock(&gaudi->clk_gate_mutex);
6589
6590         hdev->asic_funcs->disable_clock_gating(hdev);
6591
6592         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6597
6598         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6603
6604         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6609
6610         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6615
6616         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6621
6622         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6626         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6627
6628         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6633
6634         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6637         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6638         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6639
6640         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6644         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6648
6649         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6650         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6652         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6656
6657         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6658         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6660         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6664
6665         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6669         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6670         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6671         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6672
6673         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6674         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6676         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6677         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6678         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6679         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6680
6681         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6682         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6683         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6684         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6685         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6686         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6687         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6688
6689         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6690         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6691         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6692         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6693         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6694         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6695         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6696
6697         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6698         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6699         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6700         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6701         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6702         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6703         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6704
6705         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6706         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6707         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6708         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6709         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6710         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6711         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6712
6713         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6714         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6715         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6716         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6717         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6718         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6719         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6720         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6721         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6722         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6723
6724         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6725         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6726         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6727         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6728         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6729         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6730         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6731         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6732         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6733         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6734         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6735         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6736
6737         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6739                                 asid);
6740                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6741                                 asid);
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6745                                 asid);
6746                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6747                                 asid);
6748         }
6749
6750         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6752                                 asid);
6753                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6754                                 asid);
6755                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6756                                 asid);
6757                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6758                                 asid);
6759                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6760                                 asid);
6761         }
6762
6763         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6765                                 asid);
6766                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6767                                 asid);
6768                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6769                                 asid);
6770                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6771                                 asid);
6772                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6773                                 asid);
6774         }
6775
6776         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6778                                 asid);
6779                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6780                                 asid);
6781                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6782                                 asid);
6783                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6784                                 asid);
6785                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6786                                 asid);
6787         }
6788
6789         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6790                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6791                                 asid);
6792                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6793                                 asid);
6794                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6795                                 asid);
6796                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6797                                 asid);
6798                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6799                                 asid);
6800         }
6801
6802         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6803                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6804                                 asid);
6805                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6806                                 asid);
6807                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6808                                 asid);
6809                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6810                                 asid);
6811                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6812                                 asid);
6813         }
6814
6815         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6816                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6817                                 asid);
6818                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6819                                 asid);
6820                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6821                                 asid);
6822                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6823                                 asid);
6824                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6825                                 asid);
6826         }
6827
6828         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6829                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6830                                 asid);
6831                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6832                                 asid);
6833                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6834                                 asid);
6835                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6836                                 asid);
6837                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6838                                 asid);
6839         }
6840
6841         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6842                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6843                                 asid);
6844                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6845                                 asid);
6846                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6847                                 asid);
6848                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6849                                 asid);
6850                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6851                                 asid);
6852         }
6853
6854         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6855                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6856                                 asid);
6857                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6858                                 asid);
6859                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6860                                 asid);
6861                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6862                                 asid);
6863                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6864                                 asid);
6865         }
6866
6867         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6868         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6869
6870         hdev->asic_funcs->set_clock_gating(hdev);
6871
6872         mutex_unlock(&gaudi->clk_gate_mutex);
6873 }
6874
6875 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6876                 struct hl_cs_job *job)
6877 {
6878         struct packet_msg_prot *fence_pkt;
6879         u32 *fence_ptr;
6880         dma_addr_t fence_dma_addr;
6881         struct hl_cb *cb;
6882         u32 tmp, timeout, dma_offset;
6883         int rc;
6884
6885         if (hdev->pldm)
6886                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6887         else
6888                 timeout = HL_DEVICE_TIMEOUT_USEC;
6889
6890         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6891                 dev_err_ratelimited(hdev->dev,
6892                         "Can't send driver job on QMAN0 because the device is not idle\n");
6893                 return -EBUSY;
6894         }
6895
6896         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6897                                                         &fence_dma_addr);
6898         if (!fence_ptr) {
6899                 dev_err(hdev->dev,
6900                         "Failed to allocate fence memory for QMAN0\n");
6901                 return -ENOMEM;
6902         }
6903
6904         cb = job->patched_cb;
6905
6906         fence_pkt = cb->kernel_address +
6907                         job->job_cb_size - sizeof(struct packet_msg_prot);
6908
6909         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6910         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6911         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6912
6913         fence_pkt->ctl = cpu_to_le32(tmp);
6914         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6915         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6916
6917         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6918
6919         WREG32(mmDMA0_CORE_PROT + dma_offset,
6920                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6921
6922         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6923                                         job->job_cb_size, cb->bus_address);
6924         if (rc) {
6925                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6926                 goto free_fence_ptr;
6927         }
6928
6929         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6930                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6931                                 timeout, true);
6932
6933         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6934
6935         if (rc == -ETIMEDOUT) {
6936                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6937                 goto free_fence_ptr;
6938         }
6939
6940 free_fence_ptr:
6941         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6942
6943         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6944                                         fence_dma_addr);
6945         return rc;
6946 }
6947
6948 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6949 {
6950         if (event_type >= GAUDI_EVENT_SIZE)
6951                 goto event_not_supported;
6952
6953         if (!gaudi_irq_map_table[event_type].valid)
6954                 goto event_not_supported;
6955
6956         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6957
6958         return;
6959
6960 event_not_supported:
6961         snprintf(desc, size, "N/A");
6962 }
6963
6964 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6965                                                         u32 x_y, bool is_write)
6966 {
6967         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6968
6969         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6970                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6971
6972         switch (x_y) {
6973         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6974         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6975                 dma_id[0] = 0;
6976                 dma_id[1] = 2;
6977                 break;
6978         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6979         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6980                 dma_id[0] = 1;
6981                 dma_id[1] = 3;
6982                 break;
6983         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6984         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6985                 dma_id[0] = 4;
6986                 dma_id[1] = 6;
6987                 break;
6988         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6989         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6990                 dma_id[0] = 5;
6991                 dma_id[1] = 7;
6992                 break;
6993         default:
6994                 goto unknown_initiator;
6995         }
6996
6997         for (i = 0 ; i < 2 ; i++) {
6998                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6999                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
7000         }
7001
7002         switch (x_y) {
7003         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7004         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7005                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7006                         return "DMA0";
7007                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7008                         return "DMA2";
7009                 else
7010                         return "DMA0 or DMA2";
7011         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7012         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7013                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7014                         return "DMA1";
7015                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7016                         return "DMA3";
7017                 else
7018                         return "DMA1 or DMA3";
7019         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7020         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7021                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7022                         return "DMA4";
7023                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7024                         return "DMA6";
7025                 else
7026                         return "DMA4 or DMA6";
7027         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7028         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7029                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7030                         return "DMA5";
7031                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7032                         return "DMA7";
7033                 else
7034                         return "DMA5 or DMA7";
7035         }
7036
7037 unknown_initiator:
7038         return "unknown initiator";
7039 }
7040
7041 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7042                                                         bool is_write)
7043 {
7044         u32 val, x_y, axi_id;
7045
7046         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7047                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
7048         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7049                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7050         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7051                         RAZWI_INITIATOR_AXI_ID_SHIFT);
7052
7053         switch (x_y) {
7054         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7055                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7056                         return "TPC0";
7057                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7058                         return "NIC0";
7059                 break;
7060         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7061                 return "TPC1";
7062         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7063         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7064                 return "MME0";
7065         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7066         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7067                 return "MME1";
7068         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7069                 return "TPC2";
7070         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7071                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7072                         return "TPC3";
7073                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7074                         return "PCI";
7075                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7076                         return "CPU";
7077                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7078                         return "PSOC";
7079                 break;
7080         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7081         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7082         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7083         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7084         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7085         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7086         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7087         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7088                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7089         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7090                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7091                         return "TPC4";
7092                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7093                         return "NIC1";
7094                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7095                         return "NIC2";
7096                 break;
7097         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7098                 return "TPC5";
7099         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7100         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7101                 return "MME2";
7102         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7103         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7104                 return "MME3";
7105         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7106                 return "TPC6";
7107         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7108                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7109                         return "TPC7";
7110                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7111                         return "NIC4";
7112                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7113                         return "NIC5";
7114                 break;
7115         default:
7116                 break;
7117         }
7118
7119         dev_err(hdev->dev,
7120                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7121                 val,
7122                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7123                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7124                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7125                         RAZWI_INITIATOR_AXI_ID_MASK);
7126
7127         return "unknown initiator";
7128 }
7129
7130 static void gaudi_print_razwi_info(struct hl_device *hdev)
7131 {
7132         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7133                 dev_err_ratelimited(hdev->dev,
7134                         "RAZWI event caused by illegal write of %s\n",
7135                         gaudi_get_razwi_initiator_name(hdev, true));
7136                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7137         }
7138
7139         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7140                 dev_err_ratelimited(hdev->dev,
7141                         "RAZWI event caused by illegal read of %s\n",
7142                         gaudi_get_razwi_initiator_name(hdev, false));
7143                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7144         }
7145 }
7146
7147 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7148 {
7149         struct gaudi_device *gaudi = hdev->asic_specific;
7150         u64 addr;
7151         u32 val;
7152
7153         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7154                 return;
7155
7156         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7157         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7158                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7159                 addr <<= 32;
7160                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7161
7162                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7163                                         addr);
7164
7165                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7166         }
7167
7168         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7169         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7170                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7171                 addr <<= 32;
7172                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7173
7174                 dev_err_ratelimited(hdev->dev,
7175                                 "MMU access error on va 0x%llx\n", addr);
7176
7177                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7178         }
7179 }
7180
7181 /*
7182  *  +-------------------+------------------------------------------------------+
7183  *  | Configuration Reg |                     Description                      |
7184  *  |      Address      |                                                      |
7185  *  +-------------------+------------------------------------------------------+
7186  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7187  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7188  *  |                   |0xF34 memory wrappers 63:32                           |
7189  *  |                   |0xF38 memory wrappers 95:64                           |
7190  *  |                   |0xF3C memory wrappers 127:96                          |
7191  *  +-------------------+------------------------------------------------------+
7192  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7193  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7194  *  |                   |0xF44 memory wrappers 63:32                           |
7195  *  |                   |0xF48 memory wrappers 95:64                           |
7196  *  |                   |0xF4C memory wrappers 127:96                          |
7197  *  +-------------------+------------------------------------------------------+
7198  */
7199 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7200                 struct ecc_info_extract_params *params, u64 *ecc_address,
7201                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7202 {
7203         struct gaudi_device *gaudi = hdev->asic_specific;
7204         u32 i, num_mem_regs, reg, err_bit;
7205         u64 err_addr, err_word = 0;
7206         int rc = 0;
7207
7208         num_mem_regs = params->num_memories / 32 +
7209                         ((params->num_memories % 32) ? 1 : 0);
7210
7211         if (params->block_address >= CFG_BASE)
7212                 params->block_address -= CFG_BASE;
7213
7214         if (params->derr)
7215                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7216         else
7217                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7218
7219         if (params->disable_clock_gating) {
7220                 mutex_lock(&gaudi->clk_gate_mutex);
7221                 hdev->asic_funcs->disable_clock_gating(hdev);
7222         }
7223
7224         /* Set invalid wrapper index */
7225         *memory_wrapper_idx = 0xFF;
7226
7227         /* Iterate through memory wrappers, a single bit must be set */
7228         for (i = 0 ; i < num_mem_regs ; i++) {
7229                 err_addr += i * 4;
7230                 err_word = RREG32(err_addr);
7231                 if (err_word) {
7232                         err_bit = __ffs(err_word);
7233                         *memory_wrapper_idx = err_bit + (32 * i);
7234                         break;
7235                 }
7236         }
7237
7238         if (*memory_wrapper_idx == 0xFF) {
7239                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7240                 rc = -EINVAL;
7241                 goto enable_clk_gate;
7242         }
7243
7244         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7245                         *memory_wrapper_idx);
7246
7247         *ecc_address =
7248                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7249         *ecc_syndrom =
7250                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7251
7252         /* Clear error indication */
7253         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7254         if (params->derr)
7255                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7256         else
7257                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7258
7259         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7260
7261 enable_clk_gate:
7262         if (params->disable_clock_gating) {
7263                 hdev->asic_funcs->set_clock_gating(hdev);
7264
7265                 mutex_unlock(&gaudi->clk_gate_mutex);
7266         }
7267
7268         return rc;
7269 }
7270
7271 /*
7272  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7273  *
7274  * @idx: the current pi/ci value
7275  * @q_len: the queue length (power of 2)
7276  *
7277  * @return the cyclically decremented index
7278  */
7279 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7280 {
7281         u32 mask = q_len - 1;
7282
7283         /*
7284          * modular decrement is equivalent to adding (queue_size -1)
7285          * later we take LSBs to make sure the value is in the
7286          * range [0, queue_len - 1]
7287          */
7288         return (idx + q_len - 1) & mask;
7289 }
7290
7291 /**
7292  * gaudi_print_sw_config_stream_data - print SW config stream data
7293  *
7294  * @hdev: pointer to the habanalabs device structure
7295  * @stream: the QMAN's stream
7296  * @qman_base: base address of QMAN registers block
7297  */
7298 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7299                                                 u64 qman_base)
7300 {
7301         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7302         u32 cq_ptr_lo_off, size;
7303
7304         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7305
7306         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7307                                                 stream * cq_ptr_lo_off;
7308         cq_ptr_hi = cq_ptr_lo +
7309                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7310         cq_tsize = cq_ptr_lo +
7311                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7312
7313         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7314         size = RREG32(cq_tsize);
7315         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7316                                                         stream, cq_ptr, size);
7317 }
7318
7319 /**
7320  * gaudi_print_last_pqes_on_err - print last PQEs on error
7321  *
7322  * @hdev: pointer to the habanalabs device structure
7323  * @qid_base: first QID of the QMAN (out of 4 streams)
7324  * @stream: the QMAN's stream
7325  * @qman_base: base address of QMAN registers block
7326  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7327  */
7328 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7329                                                 u32 stream, u64 qman_base,
7330                                                 bool pr_sw_conf)
7331 {
7332         u32 ci, qm_ci_stream_off, queue_len;
7333         struct hl_hw_queue *q;
7334         u64 pq_ci;
7335         int i;
7336
7337         q = &hdev->kernel_queues[qid_base + stream];
7338
7339         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7340         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7341                                                 stream * qm_ci_stream_off;
7342
7343         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7344                                         q->int_queue_len : HL_QUEUE_LENGTH;
7345
7346         hdev->asic_funcs->hw_queues_lock(hdev);
7347
7348         if (pr_sw_conf)
7349                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7350
7351         ci = RREG32(pq_ci);
7352
7353         /* we should start printing form ci -1 */
7354         ci = gaudi_queue_idx_dec(ci, queue_len);
7355
7356         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7357                 struct hl_bd *bd;
7358                 u64 addr;
7359                 u32 len;
7360
7361                 bd = q->kernel_address;
7362                 bd += ci;
7363
7364                 len = le32_to_cpu(bd->len);
7365                 /* len 0 means uninitialized entry- break */
7366                 if (!len)
7367                         break;
7368
7369                 addr = le64_to_cpu(bd->ptr);
7370
7371                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7372                                                         stream, ci, addr, len);
7373
7374                 /* get previous ci, wrap if needed */
7375                 ci = gaudi_queue_idx_dec(ci, queue_len);
7376         }
7377
7378         hdev->asic_funcs->hw_queues_unlock(hdev);
7379 }
7380
7381 /**
7382  * print_qman_data_on_err - extract QMAN data on error
7383  *
7384  * @hdev: pointer to the habanalabs device structure
7385  * @qid_base: first QID of the QMAN (out of 4 streams)
7386  * @stream: the QMAN's stream
7387  * @qman_base: base address of QMAN registers block
7388  *
7389  * This function attempt to exatract as much data as possible on QMAN error.
7390  * On upper CP print the SW config stream data and last 8 PQEs.
7391  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7392  */
7393 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7394                                                 u32 stream, u64 qman_base)
7395 {
7396         u32 i;
7397
7398         if (stream != QMAN_STREAMS) {
7399                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7400                                                                         true);
7401                 return;
7402         }
7403
7404         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7405
7406         for (i = 0; i < QMAN_STREAMS; i++)
7407                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7408                                                                         false);
7409 }
7410
7411 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7412                                           const char *qm_name,
7413                                           u64 qman_base,
7414                                           u32 qid_base)
7415 {
7416         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7417         u64 glbl_sts_addr, arb_err_addr;
7418         char reg_desc[32];
7419
7420         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7421         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7422
7423         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7424         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7425                 glbl_sts_clr_val = 0;
7426                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7427
7428                 if (!glbl_sts_val)
7429                         continue;
7430
7431                 if (i == QMAN_STREAMS)
7432                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7433                 else
7434                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7435
7436                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7437                         if (glbl_sts_val & BIT(j)) {
7438                                 dev_err_ratelimited(hdev->dev,
7439                                                 "%s %s. err cause: %s\n",
7440                                                 qm_name, reg_desc,
7441                                                 gaudi_qman_error_cause[j]);
7442                                 glbl_sts_clr_val |= BIT(j);
7443                         }
7444                 }
7445
7446                 /* Write 1 clear errors */
7447                 if (!hdev->stop_on_err)
7448                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7449                 else
7450                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7451         }
7452
7453         arb_err_val = RREG32(arb_err_addr);
7454
7455         if (!arb_err_val)
7456                 return;
7457
7458         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7459                 if (arb_err_val & BIT(j)) {
7460                         dev_err_ratelimited(hdev->dev,
7461                                         "%s ARB_ERR. err cause: %s\n",
7462                                         qm_name,
7463                                         gaudi_qman_arb_error_cause[j]);
7464                 }
7465         }
7466 }
7467
7468 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7469                 struct hl_eq_sm_sei_data *sei_data)
7470 {
7471         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7472
7473         /* Flip the bits as the enum is ordered in the opposite way */
7474         index = (index ^ 0x3) & 0x3;
7475
7476         switch (sei_data->sei_cause) {
7477         case SM_SEI_SO_OVERFLOW:
7478                 dev_err_ratelimited(hdev->dev,
7479                         "%s SEI Error: SOB Group %u overflow/underflow",
7480                         gaudi_sync_manager_names[index],
7481                         le32_to_cpu(sei_data->sei_log));
7482                 break;
7483         case SM_SEI_LBW_4B_UNALIGNED:
7484                 dev_err_ratelimited(hdev->dev,
7485                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7486                         gaudi_sync_manager_names[index],
7487                         le32_to_cpu(sei_data->sei_log));
7488                 break;
7489         case SM_SEI_AXI_RESPONSE_ERR:
7490                 dev_err_ratelimited(hdev->dev,
7491                         "%s SEI Error: AXI ID %u response error",
7492                         gaudi_sync_manager_names[index],
7493                         le32_to_cpu(sei_data->sei_log));
7494                 break;
7495         default:
7496                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7497                                 le32_to_cpu(sei_data->sei_log));
7498                 break;
7499         }
7500 }
7501
7502 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7503                 struct hl_eq_ecc_data *ecc_data)
7504 {
7505         struct ecc_info_extract_params params;
7506         u64 ecc_address = 0, ecc_syndrom = 0;
7507         u8 index, memory_wrapper_idx = 0;
7508         bool extract_info_from_fw;
7509         int rc;
7510
7511         if (hdev->asic_prop.fw_security_enabled) {
7512                 extract_info_from_fw = true;
7513                 goto extract_ecc_info;
7514         }
7515
7516         switch (event_type) {
7517         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7518         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7519                 extract_info_from_fw = true;
7520                 break;
7521         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7522                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7523                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7524                 params.num_memories = 90;
7525                 params.derr = false;
7526                 params.disable_clock_gating = true;
7527                 extract_info_from_fw = false;
7528                 break;
7529         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7530                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7531                 params.block_address =
7532                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7533                 params.num_memories = 90;
7534                 params.derr = true;
7535                 params.disable_clock_gating = true;
7536                 extract_info_from_fw = false;
7537                 break;
7538         case GAUDI_EVENT_MME0_ACC_SERR:
7539         case GAUDI_EVENT_MME1_ACC_SERR:
7540         case GAUDI_EVENT_MME2_ACC_SERR:
7541         case GAUDI_EVENT_MME3_ACC_SERR:
7542                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7543                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7544                 params.num_memories = 128;
7545                 params.derr = false;
7546                 params.disable_clock_gating = true;
7547                 extract_info_from_fw = false;
7548                 break;
7549         case GAUDI_EVENT_MME0_ACC_DERR:
7550         case GAUDI_EVENT_MME1_ACC_DERR:
7551         case GAUDI_EVENT_MME2_ACC_DERR:
7552         case GAUDI_EVENT_MME3_ACC_DERR:
7553                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7554                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7555                 params.num_memories = 128;
7556                 params.derr = true;
7557                 params.disable_clock_gating = true;
7558                 extract_info_from_fw = false;
7559                 break;
7560         case GAUDI_EVENT_MME0_SBAB_SERR:
7561         case GAUDI_EVENT_MME1_SBAB_SERR:
7562         case GAUDI_EVENT_MME2_SBAB_SERR:
7563         case GAUDI_EVENT_MME3_SBAB_SERR:
7564                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7565                 params.block_address =
7566                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7567                 params.num_memories = 33;
7568                 params.derr = false;
7569                 params.disable_clock_gating = true;
7570                 extract_info_from_fw = false;
7571                 break;
7572         case GAUDI_EVENT_MME0_SBAB_DERR:
7573         case GAUDI_EVENT_MME1_SBAB_DERR:
7574         case GAUDI_EVENT_MME2_SBAB_DERR:
7575         case GAUDI_EVENT_MME3_SBAB_DERR:
7576                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7577                 params.block_address =
7578                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7579                 params.num_memories = 33;
7580                 params.derr = true;
7581                 params.disable_clock_gating = true;
7582                 extract_info_from_fw = false;
7583                 break;
7584         default:
7585                 return;
7586         }
7587
7588 extract_ecc_info:
7589         if (extract_info_from_fw) {
7590                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7591                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7592                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7593         } else {
7594                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7595                                 &ecc_syndrom, &memory_wrapper_idx);
7596                 if (rc)
7597                         return;
7598         }
7599
7600         dev_err(hdev->dev,
7601                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7602                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7603 }
7604
7605 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7606 {
7607         u64 qman_base;
7608         char desc[32];
7609         u32 qid_base;
7610         u8 index;
7611
7612         switch (event_type) {
7613         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7614                 index = event_type - GAUDI_EVENT_TPC0_QM;
7615                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7616                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7617                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7618                 break;
7619         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7620                 index = event_type - GAUDI_EVENT_MME0_QM;
7621                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7622                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7623                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7624                 break;
7625         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7626                 index = event_type - GAUDI_EVENT_DMA0_QM;
7627                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7628                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7629                 if (index > 1)
7630                         qid_base++;
7631                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7632                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7633                 break;
7634         case GAUDI_EVENT_NIC0_QM0:
7635                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7636                 qman_base = mmNIC0_QM0_BASE;
7637                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7638                 break;
7639         case GAUDI_EVENT_NIC0_QM1:
7640                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7641                 qman_base = mmNIC0_QM1_BASE;
7642                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7643                 break;
7644         case GAUDI_EVENT_NIC1_QM0:
7645                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7646                 qman_base = mmNIC1_QM0_BASE;
7647                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7648                 break;
7649         case GAUDI_EVENT_NIC1_QM1:
7650                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7651                 qman_base = mmNIC1_QM1_BASE;
7652                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7653                 break;
7654         case GAUDI_EVENT_NIC2_QM0:
7655                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7656                 qman_base = mmNIC2_QM0_BASE;
7657                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7658                 break;
7659         case GAUDI_EVENT_NIC2_QM1:
7660                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7661                 qman_base = mmNIC2_QM1_BASE;
7662                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7663                 break;
7664         case GAUDI_EVENT_NIC3_QM0:
7665                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7666                 qman_base = mmNIC3_QM0_BASE;
7667                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7668                 break;
7669         case GAUDI_EVENT_NIC3_QM1:
7670                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7671                 qman_base = mmNIC3_QM1_BASE;
7672                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7673                 break;
7674         case GAUDI_EVENT_NIC4_QM0:
7675                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7676                 qman_base = mmNIC4_QM0_BASE;
7677                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7678                 break;
7679         case GAUDI_EVENT_NIC4_QM1:
7680                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7681                 qman_base = mmNIC4_QM1_BASE;
7682                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7683                 break;
7684         default:
7685                 return;
7686         }
7687
7688         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7689 }
7690
7691 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7692                                         bool razwi)
7693 {
7694         char desc[64] = "";
7695
7696         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7697         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7698                 event_type, desc);
7699
7700         if (razwi) {
7701                 gaudi_print_razwi_info(hdev);
7702                 gaudi_print_mmu_error_info(hdev);
7703         }
7704 }
7705
7706 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7707                                         struct cpucp_pkt_sync_err *sync_err)
7708 {
7709         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7710
7711         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7712                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7713 }
7714
7715 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7716                                         struct hl_eq_fw_alive *fw_alive)
7717 {
7718         dev_err(hdev->dev,
7719                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7720                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7721                 "Minor" : "Critical", fw_alive->process_id,
7722                 fw_alive->thread_id, fw_alive->uptime_seconds);
7723 }
7724
7725 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7726 {
7727         struct gaudi_device *gaudi = hdev->asic_specific;
7728
7729         /* Unmask all IRQs since some could have been received
7730          * during the soft reset
7731          */
7732         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7733 }
7734
7735 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7736                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7737 {
7738         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7739         int rc = 0;
7740
7741         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7742                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7743                 if (!hbm_ecc_data) {
7744                         dev_err(hdev->dev, "No FW ECC data");
7745                         return 0;
7746                 }
7747
7748                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7749                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7750                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7751                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7752                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7753                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7754                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7755                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7756                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7757                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7758                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7759                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7760                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7761                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7762
7763                 dev_err(hdev->dev,
7764                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7765                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7766                 dev_err(hdev->dev,
7767                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7768                         device, ch, hbm_ecc_data->first_addr, type,
7769                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7770                         hbm_ecc_data->dec_cnt);
7771                 return 0;
7772         }
7773
7774         if (hdev->asic_prop.fw_security_enabled) {
7775                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7776                 return 0;
7777         }
7778
7779         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7780         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7781                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7782                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7783                 if (val) {
7784                         rc = -EIO;
7785                         dev_err(hdev->dev,
7786                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7787                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7788                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7789                                 (val >> 4) & 0x1);
7790
7791                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7792                         dev_err(hdev->dev,
7793                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7794                                 device, ch * 2,
7795                                 RREG32(base + ch * 0x1000 + 0x064),
7796                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7797                                 (val2 & 0xFF0000) >> 16,
7798                                 (val2 & 0xFF000000) >> 24);
7799                 }
7800
7801                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7802                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7803                 if (val) {
7804                         rc = -EIO;
7805                         dev_err(hdev->dev,
7806                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7807                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7808                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7809                                 (val >> 4) & 0x1);
7810
7811                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7812                         dev_err(hdev->dev,
7813                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7814                                 device, ch * 2 + 1,
7815                                 RREG32(base + ch * 0x1000 + 0x074),
7816                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7817                                 (val2 & 0xFF0000) >> 16,
7818                                 (val2 & 0xFF000000) >> 24);
7819                 }
7820
7821                 /* Clear interrupts */
7822                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7823                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7824                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7825                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7826                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7827                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7828         }
7829
7830         val  = RREG32(base + 0x8F30);
7831         val2 = RREG32(base + 0x8F34);
7832         if (val | val2) {
7833                 rc = -EIO;
7834                 dev_err(hdev->dev,
7835                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7836                         device, val, val2);
7837         }
7838         val  = RREG32(base + 0x8F40);
7839         val2 = RREG32(base + 0x8F44);
7840         if (val | val2) {
7841                 rc = -EIO;
7842                 dev_err(hdev->dev,
7843                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7844                         device, val, val2);
7845         }
7846
7847         return rc;
7848 }
7849
7850 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7851 {
7852         switch (hbm_event_type) {
7853         case GAUDI_EVENT_HBM0_SPI_0:
7854         case GAUDI_EVENT_HBM0_SPI_1:
7855                 return 0;
7856         case GAUDI_EVENT_HBM1_SPI_0:
7857         case GAUDI_EVENT_HBM1_SPI_1:
7858                 return 1;
7859         case GAUDI_EVENT_HBM2_SPI_0:
7860         case GAUDI_EVENT_HBM2_SPI_1:
7861                 return 2;
7862         case GAUDI_EVENT_HBM3_SPI_0:
7863         case GAUDI_EVENT_HBM3_SPI_1:
7864                 return 3;
7865         default:
7866                 break;
7867         }
7868
7869         /* Should never happen */
7870         return 0;
7871 }
7872
7873 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7874                                         char *interrupt_name)
7875 {
7876         struct gaudi_device *gaudi = hdev->asic_specific;
7877         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7878         bool soft_reset_required = false;
7879
7880         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7881          * gating, and thus cannot be done in CPU-CP and should be done instead
7882          * by the driver.
7883          */
7884
7885         mutex_lock(&gaudi->clk_gate_mutex);
7886
7887         hdev->asic_funcs->disable_clock_gating(hdev);
7888
7889         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7890                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7891
7892         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7893                 if (tpc_interrupts_cause & BIT(i)) {
7894                         dev_err_ratelimited(hdev->dev,
7895                                         "TPC%d_%s interrupt cause: %s\n",
7896                                         tpc_id, interrupt_name,
7897                                         gaudi_tpc_interrupts_cause[i]);
7898                         /* If this is QM error, we need to soft-reset */
7899                         if (i == 15)
7900                                 soft_reset_required = true;
7901                 }
7902
7903         /* Clear interrupts */
7904         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7905
7906         hdev->asic_funcs->set_clock_gating(hdev);
7907
7908         mutex_unlock(&gaudi->clk_gate_mutex);
7909
7910         return soft_reset_required;
7911 }
7912
7913 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7914 {
7915         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7916 }
7917
7918 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7919 {
7920         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7921 }
7922
7923 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7924                                         u16 event_type)
7925 {
7926         switch (event_type) {
7927         case GAUDI_EVENT_FIX_POWER_ENV_S:
7928                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7929                 dev_info_ratelimited(hdev->dev,
7930                         "Clock throttling due to power consumption\n");
7931                 break;
7932
7933         case GAUDI_EVENT_FIX_POWER_ENV_E:
7934                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7935                 dev_info_ratelimited(hdev->dev,
7936                         "Power envelop is safe, back to optimal clock\n");
7937                 break;
7938
7939         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7940                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7941                 dev_info_ratelimited(hdev->dev,
7942                         "Clock throttling due to overheating\n");
7943                 break;
7944
7945         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7946                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7947                 dev_info_ratelimited(hdev->dev,
7948                         "Thermal envelop is safe, back to optimal clock\n");
7949                 break;
7950
7951         default:
7952                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7953                         event_type);
7954                 break;
7955         }
7956 }
7957
7958 static void gaudi_handle_eqe(struct hl_device *hdev,
7959                                 struct hl_eq_entry *eq_entry)
7960 {
7961         struct gaudi_device *gaudi = hdev->asic_specific;
7962         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7963         u32 fw_fatal_err_flag = 0;
7964         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7965                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7966         bool reset_required;
7967         u8 cause;
7968         int rc;
7969
7970         if (event_type >= GAUDI_EVENT_SIZE) {
7971                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7972                                 event_type, GAUDI_EVENT_SIZE - 1);
7973                 return;
7974         }
7975
7976         gaudi->events_stat[event_type]++;
7977         gaudi->events_stat_aggregate[event_type]++;
7978
7979         switch (event_type) {
7980         case GAUDI_EVENT_PCIE_CORE_DERR:
7981         case GAUDI_EVENT_PCIE_IF_DERR:
7982         case GAUDI_EVENT_PCIE_PHY_DERR:
7983         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7984         case GAUDI_EVENT_MME0_ACC_DERR:
7985         case GAUDI_EVENT_MME0_SBAB_DERR:
7986         case GAUDI_EVENT_MME1_ACC_DERR:
7987         case GAUDI_EVENT_MME1_SBAB_DERR:
7988         case GAUDI_EVENT_MME2_ACC_DERR:
7989         case GAUDI_EVENT_MME2_SBAB_DERR:
7990         case GAUDI_EVENT_MME3_ACC_DERR:
7991         case GAUDI_EVENT_MME3_SBAB_DERR:
7992         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7993                 fallthrough;
7994         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7995         case GAUDI_EVENT_PSOC_MEM_DERR:
7996         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7997         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7998         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7999         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
8000         case GAUDI_EVENT_MMU_DERR:
8001         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
8002                 gaudi_print_irq_info(hdev, event_type, true);
8003                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8004                 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
8005                 goto reset_device;
8006
8007         case GAUDI_EVENT_GIC500:
8008         case GAUDI_EVENT_AXI_ECC:
8009         case GAUDI_EVENT_L2_RAM_ECC:
8010         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
8011                 gaudi_print_irq_info(hdev, event_type, false);
8012                 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
8013                 goto reset_device;
8014
8015         case GAUDI_EVENT_HBM0_SPI_0:
8016         case GAUDI_EVENT_HBM1_SPI_0:
8017         case GAUDI_EVENT_HBM2_SPI_0:
8018         case GAUDI_EVENT_HBM3_SPI_0:
8019                 gaudi_print_irq_info(hdev, event_type, false);
8020                 gaudi_hbm_read_interrupts(hdev,
8021                                 gaudi_hbm_event_to_dev(event_type),
8022                                 &eq_entry->hbm_ecc_data);
8023                 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
8024                 goto reset_device;
8025
8026         case GAUDI_EVENT_HBM0_SPI_1:
8027         case GAUDI_EVENT_HBM1_SPI_1:
8028         case GAUDI_EVENT_HBM2_SPI_1:
8029         case GAUDI_EVENT_HBM3_SPI_1:
8030                 gaudi_print_irq_info(hdev, event_type, false);
8031                 gaudi_hbm_read_interrupts(hdev,
8032                                 gaudi_hbm_event_to_dev(event_type),
8033                                 &eq_entry->hbm_ecc_data);
8034                 hl_fw_unmask_irq(hdev, event_type);
8035                 break;
8036
8037         case GAUDI_EVENT_TPC0_DEC:
8038         case GAUDI_EVENT_TPC1_DEC:
8039         case GAUDI_EVENT_TPC2_DEC:
8040         case GAUDI_EVENT_TPC3_DEC:
8041         case GAUDI_EVENT_TPC4_DEC:
8042         case GAUDI_EVENT_TPC5_DEC:
8043         case GAUDI_EVENT_TPC6_DEC:
8044         case GAUDI_EVENT_TPC7_DEC:
8045                 gaudi_print_irq_info(hdev, event_type, true);
8046                 reset_required = gaudi_tpc_read_interrupts(hdev,
8047                                         tpc_dec_event_to_tpc_id(event_type),
8048                                         "AXI_SLV_DEC_Error");
8049                 if (reset_required) {
8050                         dev_err(hdev->dev, "reset required due to %s\n",
8051                                 gaudi_irq_map_table[event_type].name);
8052
8053                         hl_device_reset(hdev, 0);
8054                 } else {
8055                         hl_fw_unmask_irq(hdev, event_type);
8056                 }
8057                 break;
8058
8059         case GAUDI_EVENT_TPC0_KRN_ERR:
8060         case GAUDI_EVENT_TPC1_KRN_ERR:
8061         case GAUDI_EVENT_TPC2_KRN_ERR:
8062         case GAUDI_EVENT_TPC3_KRN_ERR:
8063         case GAUDI_EVENT_TPC4_KRN_ERR:
8064         case GAUDI_EVENT_TPC5_KRN_ERR:
8065         case GAUDI_EVENT_TPC6_KRN_ERR:
8066         case GAUDI_EVENT_TPC7_KRN_ERR:
8067                 gaudi_print_irq_info(hdev, event_type, true);
8068                 reset_required = gaudi_tpc_read_interrupts(hdev,
8069                                         tpc_krn_event_to_tpc_id(event_type),
8070                                         "KRN_ERR");
8071                 if (reset_required) {
8072                         dev_err(hdev->dev, "reset required due to %s\n",
8073                                 gaudi_irq_map_table[event_type].name);
8074
8075                         hl_device_reset(hdev, 0);
8076                 } else {
8077                         hl_fw_unmask_irq(hdev, event_type);
8078                 }
8079                 break;
8080
8081         case GAUDI_EVENT_PCIE_CORE_SERR:
8082         case GAUDI_EVENT_PCIE_IF_SERR:
8083         case GAUDI_EVENT_PCIE_PHY_SERR:
8084         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8085         case GAUDI_EVENT_MME0_ACC_SERR:
8086         case GAUDI_EVENT_MME0_SBAB_SERR:
8087         case GAUDI_EVENT_MME1_ACC_SERR:
8088         case GAUDI_EVENT_MME1_SBAB_SERR:
8089         case GAUDI_EVENT_MME2_ACC_SERR:
8090         case GAUDI_EVENT_MME2_SBAB_SERR:
8091         case GAUDI_EVENT_MME3_ACC_SERR:
8092         case GAUDI_EVENT_MME3_SBAB_SERR:
8093         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8094         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8095         case GAUDI_EVENT_PSOC_MEM_SERR:
8096         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8097         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8098         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8099         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8100                 fallthrough;
8101         case GAUDI_EVENT_MMU_SERR:
8102                 gaudi_print_irq_info(hdev, event_type, true);
8103                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8104                 hl_fw_unmask_irq(hdev, event_type);
8105                 break;
8106
8107         case GAUDI_EVENT_PCIE_DEC:
8108         case GAUDI_EVENT_MME0_WBC_RSP:
8109         case GAUDI_EVENT_MME0_SBAB0_RSP:
8110         case GAUDI_EVENT_MME1_WBC_RSP:
8111         case GAUDI_EVENT_MME1_SBAB0_RSP:
8112         case GAUDI_EVENT_MME2_WBC_RSP:
8113         case GAUDI_EVENT_MME2_SBAB0_RSP:
8114         case GAUDI_EVENT_MME3_WBC_RSP:
8115         case GAUDI_EVENT_MME3_SBAB0_RSP:
8116         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8117         case GAUDI_EVENT_PSOC_AXI_DEC:
8118         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8119         case GAUDI_EVENT_MMU_PAGE_FAULT:
8120         case GAUDI_EVENT_MMU_WR_PERM:
8121         case GAUDI_EVENT_RAZWI_OR_ADC:
8122         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8123         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8124         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8125                 fallthrough;
8126         case GAUDI_EVENT_NIC0_QM0:
8127         case GAUDI_EVENT_NIC0_QM1:
8128         case GAUDI_EVENT_NIC1_QM0:
8129         case GAUDI_EVENT_NIC1_QM1:
8130         case GAUDI_EVENT_NIC2_QM0:
8131         case GAUDI_EVENT_NIC2_QM1:
8132         case GAUDI_EVENT_NIC3_QM0:
8133         case GAUDI_EVENT_NIC3_QM1:
8134         case GAUDI_EVENT_NIC4_QM0:
8135         case GAUDI_EVENT_NIC4_QM1:
8136         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8137                 gaudi_print_irq_info(hdev, event_type, true);
8138                 gaudi_handle_qman_err(hdev, event_type);
8139                 hl_fw_unmask_irq(hdev, event_type);
8140                 break;
8141
8142         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8143                 gaudi_print_irq_info(hdev, event_type, true);
8144                 goto reset_device;
8145
8146         case GAUDI_EVENT_TPC0_BMON_SPMU:
8147         case GAUDI_EVENT_TPC1_BMON_SPMU:
8148         case GAUDI_EVENT_TPC2_BMON_SPMU:
8149         case GAUDI_EVENT_TPC3_BMON_SPMU:
8150         case GAUDI_EVENT_TPC4_BMON_SPMU:
8151         case GAUDI_EVENT_TPC5_BMON_SPMU:
8152         case GAUDI_EVENT_TPC6_BMON_SPMU:
8153         case GAUDI_EVENT_TPC7_BMON_SPMU:
8154         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8155                 gaudi_print_irq_info(hdev, event_type, false);
8156                 hl_fw_unmask_irq(hdev, event_type);
8157                 break;
8158
8159         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8160                 gaudi_print_irq_info(hdev, event_type, false);
8161                 gaudi_print_sm_sei_info(hdev, event_type,
8162                                         &eq_entry->sm_sei_data);
8163                 rc = hl_state_dump(hdev);
8164                 if (rc)
8165                         dev_err(hdev->dev,
8166                                 "Error during system state dump %d\n", rc);
8167                 hl_fw_unmask_irq(hdev, event_type);
8168                 break;
8169
8170         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8171                 gaudi_print_clk_change_info(hdev, event_type);
8172                 hl_fw_unmask_irq(hdev, event_type);
8173                 break;
8174
8175         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8176                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8177                 dev_err(hdev->dev,
8178                         "Received high temp H/W interrupt %d (cause %d)\n",
8179                         event_type, cause);
8180                 break;
8181
8182         case GAUDI_EVENT_DEV_RESET_REQ:
8183                 gaudi_print_irq_info(hdev, event_type, false);
8184                 goto reset_device;
8185
8186         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8187                 gaudi_print_irq_info(hdev, event_type, false);
8188                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8189                 goto reset_device;
8190
8191         case GAUDI_EVENT_FW_ALIVE_S:
8192                 gaudi_print_irq_info(hdev, event_type, false);
8193                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8194                 goto reset_device;
8195
8196         default:
8197                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8198                                 event_type);
8199                 break;
8200         }
8201
8202         return;
8203
8204 reset_device:
8205         if (hdev->asic_prop.fw_security_enabled)
8206                 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
8207         else if (hdev->hard_reset_on_fw_events)
8208                 hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
8209         else
8210                 hl_fw_unmask_irq(hdev, event_type);
8211 }
8212
8213 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8214                                         u32 *size)
8215 {
8216         struct gaudi_device *gaudi = hdev->asic_specific;
8217
8218         if (aggregate) {
8219                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8220                 return gaudi->events_stat_aggregate;
8221         }
8222
8223         *size = (u32) sizeof(gaudi->events_stat);
8224         return gaudi->events_stat;
8225 }
8226
8227 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8228                                         u32 flags)
8229 {
8230         struct gaudi_device *gaudi = hdev->asic_specific;
8231         u32 status, timeout_usec;
8232         int rc;
8233
8234         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8235                 hdev->hard_reset_pending)
8236                 return 0;
8237
8238         if (hdev->pldm)
8239                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8240         else
8241                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8242
8243         /* L0 & L1 invalidation */
8244         WREG32(mmSTLB_INV_PS, 3);
8245         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8246         WREG32(mmSTLB_INV_PS, 2);
8247
8248         rc = hl_poll_timeout(
8249                 hdev,
8250                 mmSTLB_INV_PS,
8251                 status,
8252                 !status,
8253                 1000,
8254                 timeout_usec);
8255
8256         WREG32(mmSTLB_INV_SET, 0);
8257
8258         if (rc) {
8259                 dev_err_ratelimited(hdev->dev,
8260                                         "MMU cache invalidation timeout\n");
8261                 hl_device_reset(hdev, HL_RESET_HARD);
8262         }
8263
8264         return rc;
8265 }
8266
8267 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8268                                                 bool is_hard, u32 flags,
8269                                                 u32 asid, u64 va, u64 size)
8270 {
8271         /* Treat as invalidate all because there is no range invalidation
8272          * in Gaudi
8273          */
8274         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8275 }
8276
8277 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8278                                         u32 asid, u64 phys_addr)
8279 {
8280         u32 status, timeout_usec;
8281         int rc;
8282
8283         if (hdev->pldm)
8284                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8285         else
8286                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8287
8288         WREG32(MMU_ASID, asid);
8289         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8290         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8291         WREG32(MMU_BUSY, 0x80000000);
8292
8293         rc = hl_poll_timeout(
8294                 hdev,
8295                 MMU_BUSY,
8296                 status,
8297                 !(status & 0x80000000),
8298                 1000,
8299                 timeout_usec);
8300
8301         if (rc) {
8302                 dev_err(hdev->dev,
8303                         "Timeout during MMU hop0 config of asid %d\n", asid);
8304                 return rc;
8305         }
8306
8307         return 0;
8308 }
8309
8310 static int gaudi_send_heartbeat(struct hl_device *hdev)
8311 {
8312         struct gaudi_device *gaudi = hdev->asic_specific;
8313
8314         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8315                 return 0;
8316
8317         return hl_fw_send_heartbeat(hdev);
8318 }
8319
8320 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8321 {
8322         struct gaudi_device *gaudi = hdev->asic_specific;
8323         struct asic_fixed_properties *prop = &hdev->asic_prop;
8324         int rc;
8325
8326         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8327                 return 0;
8328
8329         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8330                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8331                                         mmCPU_BOOT_ERR1);
8332         if (rc)
8333                 return rc;
8334
8335         if (!strlen(prop->cpucp_info.card_name))
8336                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8337                                 CARD_NAME_MAX_LEN);
8338
8339         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8340
8341         set_default_power_values(hdev);
8342
8343         hdev->max_power = prop->max_power_default;
8344
8345         return 0;
8346 }
8347
8348 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8349                                         u8 mask_len, struct seq_file *s)
8350 {
8351         struct gaudi_device *gaudi = hdev->asic_specific;
8352         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8353         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8354         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8355         unsigned long *mask = (unsigned long *)mask_arr;
8356         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8357         bool is_idle = true, is_eng_idle, is_slave;
8358         u64 offset;
8359         int i, dma_id, port;
8360
8361         mutex_lock(&gaudi->clk_gate_mutex);
8362
8363         hdev->asic_funcs->disable_clock_gating(hdev);
8364
8365         if (s)
8366                 seq_puts(s,
8367                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8368                         "---  -------  ------------  ----------  -------------\n");
8369
8370         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8371                 dma_id = gaudi_dma_assignment[i];
8372                 offset = dma_id * DMA_QMAN_OFFSET;
8373
8374                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8375                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8376                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8377                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8378                                 IS_DMA_IDLE(dma_core_sts0);
8379                 is_idle &= is_eng_idle;
8380
8381                 if (mask && !is_eng_idle)
8382                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8383                 if (s)
8384                         seq_printf(s, fmt, dma_id,
8385                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8386                                 qm_cgm_sts, dma_core_sts0);
8387         }
8388
8389         if (s)
8390                 seq_puts(s,
8391                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8392                         "---  -------  ------------  ----------  ----------\n");
8393
8394         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8395                 offset = i * TPC_QMAN_OFFSET;
8396                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8397                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8398                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8399                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8400                                 IS_TPC_IDLE(tpc_cfg_sts);
8401                 is_idle &= is_eng_idle;
8402
8403                 if (mask && !is_eng_idle)
8404                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8405                 if (s)
8406                         seq_printf(s, fmt, i,
8407                                 is_eng_idle ? "Y" : "N",
8408                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8409         }
8410
8411         if (s)
8412                 seq_puts(s,
8413                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8414                         "---  -------  ------------  ----------  -----------\n");
8415
8416         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8417                 offset = i * MME_QMAN_OFFSET;
8418                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8419                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8420
8421                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8422                 is_slave = i % 2;
8423                 if (!is_slave) {
8424                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8425                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8426                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8427                 }
8428
8429                 is_idle &= is_eng_idle;
8430
8431                 if (mask && !is_eng_idle)
8432                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8433                 if (s) {
8434                         if (!is_slave)
8435                                 seq_printf(s, fmt, i,
8436                                         is_eng_idle ? "Y" : "N",
8437                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8438                         else
8439                                 seq_printf(s, mme_slave_fmt, i,
8440                                         is_eng_idle ? "Y" : "N", "-",
8441                                         "-", mme_arch_sts);
8442                 }
8443         }
8444
8445         if (s)
8446                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8447                                 "---  -------  ------------  ----------\n");
8448
8449         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8450                 offset = i * NIC_MACRO_QMAN_OFFSET;
8451                 port = 2 * i;
8452                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8453                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8454                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8455                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8456                         is_idle &= is_eng_idle;
8457
8458                         if (mask && !is_eng_idle)
8459                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8460                         if (s)
8461                                 seq_printf(s, nic_fmt, port,
8462                                                 is_eng_idle ? "Y" : "N",
8463                                                 qm_glbl_sts0, qm_cgm_sts);
8464                 }
8465
8466                 port = 2 * i + 1;
8467                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8468                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8469                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8470                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8471                         is_idle &= is_eng_idle;
8472
8473                         if (mask && !is_eng_idle)
8474                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8475                         if (s)
8476                                 seq_printf(s, nic_fmt, port,
8477                                                 is_eng_idle ? "Y" : "N",
8478                                                 qm_glbl_sts0, qm_cgm_sts);
8479                 }
8480         }
8481
8482         if (s)
8483                 seq_puts(s, "\n");
8484
8485         hdev->asic_funcs->set_clock_gating(hdev);
8486
8487         mutex_unlock(&gaudi->clk_gate_mutex);
8488
8489         return is_idle;
8490 }
8491
8492 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8493         __acquires(&gaudi->hw_queues_lock)
8494 {
8495         struct gaudi_device *gaudi = hdev->asic_specific;
8496
8497         spin_lock(&gaudi->hw_queues_lock);
8498 }
8499
8500 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8501         __releases(&gaudi->hw_queues_lock)
8502 {
8503         struct gaudi_device *gaudi = hdev->asic_specific;
8504
8505         spin_unlock(&gaudi->hw_queues_lock);
8506 }
8507
8508 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8509 {
8510         return hdev->pdev->device;
8511 }
8512
8513 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8514                                 size_t max_size)
8515 {
8516         struct gaudi_device *gaudi = hdev->asic_specific;
8517
8518         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8519                 return 0;
8520
8521         return hl_fw_get_eeprom_data(hdev, data, max_size);
8522 }
8523
8524 /*
8525  * this function should be used only during initialization and/or after reset,
8526  * when there are no active users.
8527  */
8528 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8529                                 u32 tpc_id)
8530 {
8531         struct gaudi_device *gaudi = hdev->asic_specific;
8532         u64 kernel_timeout;
8533         u32 status, offset;
8534         int rc;
8535
8536         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8537
8538         if (hdev->pldm)
8539                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8540         else
8541                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8542
8543         mutex_lock(&gaudi->clk_gate_mutex);
8544
8545         hdev->asic_funcs->disable_clock_gating(hdev);
8546
8547         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8548                         lower_32_bits(tpc_kernel));
8549         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8550                         upper_32_bits(tpc_kernel));
8551
8552         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8553                         lower_32_bits(tpc_kernel));
8554         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8555                         upper_32_bits(tpc_kernel));
8556         /* set a valid LUT pointer, content is of no significance */
8557         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8558                         lower_32_bits(tpc_kernel));
8559         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8560                         upper_32_bits(tpc_kernel));
8561
8562         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8563                         lower_32_bits(CFG_BASE +
8564                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8565
8566         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8567                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8568                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8569         /* wait a bit for the engine to start executing */
8570         usleep_range(1000, 1500);
8571
8572         /* wait until engine has finished executing */
8573         rc = hl_poll_timeout(
8574                 hdev,
8575                 mmTPC0_CFG_STATUS + offset,
8576                 status,
8577                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8578                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8579                 1000,
8580                 kernel_timeout);
8581
8582         if (rc) {
8583                 dev_err(hdev->dev,
8584                         "Timeout while waiting for TPC%d icache prefetch\n",
8585                         tpc_id);
8586                 hdev->asic_funcs->set_clock_gating(hdev);
8587                 mutex_unlock(&gaudi->clk_gate_mutex);
8588                 return -EIO;
8589         }
8590
8591         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8592                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8593
8594         /* wait a bit for the engine to start executing */
8595         usleep_range(1000, 1500);
8596
8597         /* wait until engine has finished executing */
8598         rc = hl_poll_timeout(
8599                 hdev,
8600                 mmTPC0_CFG_STATUS + offset,
8601                 status,
8602                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8603                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8604                 1000,
8605                 kernel_timeout);
8606
8607         if (rc) {
8608                 dev_err(hdev->dev,
8609                         "Timeout while waiting for TPC%d vector pipe\n",
8610                         tpc_id);
8611                 hdev->asic_funcs->set_clock_gating(hdev);
8612                 mutex_unlock(&gaudi->clk_gate_mutex);
8613                 return -EIO;
8614         }
8615
8616         rc = hl_poll_timeout(
8617                 hdev,
8618                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8619                 status,
8620                 (status == 0),
8621                 1000,
8622                 kernel_timeout);
8623
8624         hdev->asic_funcs->set_clock_gating(hdev);
8625         mutex_unlock(&gaudi->clk_gate_mutex);
8626
8627         if (rc) {
8628                 dev_err(hdev->dev,
8629                         "Timeout while waiting for TPC%d kernel to execute\n",
8630                         tpc_id);
8631                 return -EIO;
8632         }
8633
8634         return 0;
8635 }
8636
8637 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8638                 struct hl_ctx *ctx)
8639 {
8640         struct gaudi_device *gaudi = hdev->asic_specific;
8641         int min_alloc_order, rc, collective_cb_size;
8642
8643         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8644                 return 0;
8645
8646         hdev->internal_cb_pool_virt_addr =
8647                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8648                                         HOST_SPACE_INTERNAL_CB_SZ,
8649                                         &hdev->internal_cb_pool_dma_addr,
8650                                         GFP_KERNEL | __GFP_ZERO);
8651
8652         if (!hdev->internal_cb_pool_virt_addr)
8653                 return -ENOMEM;
8654
8655         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8656                         sizeof(struct packet_fence);
8657         min_alloc_order = ilog2(collective_cb_size);
8658
8659         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8660         if (!hdev->internal_cb_pool) {
8661                 dev_err(hdev->dev,
8662                         "Failed to create internal CB pool\n");
8663                 rc = -ENOMEM;
8664                 goto free_internal_cb_pool;
8665         }
8666
8667         rc = gen_pool_add(hdev->internal_cb_pool,
8668                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8669                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8670         if (rc) {
8671                 dev_err(hdev->dev,
8672                         "Failed to add memory to internal CB pool\n");
8673                 rc = -EFAULT;
8674                 goto destroy_internal_cb_pool;
8675         }
8676
8677         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8678                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8679                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8680
8681         if (!hdev->internal_cb_va_base) {
8682                 rc = -ENOMEM;
8683                 goto destroy_internal_cb_pool;
8684         }
8685
8686         mutex_lock(&ctx->mmu_lock);
8687         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8688                         hdev->internal_cb_pool_dma_addr,
8689                         HOST_SPACE_INTERNAL_CB_SZ);
8690
8691         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8692         mutex_unlock(&ctx->mmu_lock);
8693
8694         if (rc)
8695                 goto unreserve_internal_cb_pool;
8696
8697         return 0;
8698
8699 unreserve_internal_cb_pool:
8700         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8701                         HOST_SPACE_INTERNAL_CB_SZ);
8702 destroy_internal_cb_pool:
8703         gen_pool_destroy(hdev->internal_cb_pool);
8704 free_internal_cb_pool:
8705         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8706                         HOST_SPACE_INTERNAL_CB_SZ,
8707                         hdev->internal_cb_pool_virt_addr,
8708                         hdev->internal_cb_pool_dma_addr);
8709
8710         return rc;
8711 }
8712
8713 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8714                 struct hl_ctx *ctx)
8715 {
8716         struct gaudi_device *gaudi = hdev->asic_specific;
8717
8718         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8719                 return;
8720
8721         mutex_lock(&ctx->mmu_lock);
8722         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8723                         HOST_SPACE_INTERNAL_CB_SZ);
8724         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8725                         HOST_SPACE_INTERNAL_CB_SZ);
8726         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8727         mutex_unlock(&ctx->mmu_lock);
8728
8729         gen_pool_destroy(hdev->internal_cb_pool);
8730
8731         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8732                         HOST_SPACE_INTERNAL_CB_SZ,
8733                         hdev->internal_cb_pool_virt_addr,
8734                         hdev->internal_cb_pool_dma_addr);
8735 }
8736
8737 static int gaudi_ctx_init(struct hl_ctx *ctx)
8738 {
8739         int rc;
8740
8741         if (ctx->asid == HL_KERNEL_ASID_ID)
8742                 return 0;
8743
8744         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8745         if (rc)
8746                 return rc;
8747
8748         rc = gaudi_restore_user_registers(ctx->hdev);
8749         if (rc)
8750                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8751
8752         return rc;
8753 }
8754
8755 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8756 {
8757         if (ctx->asid == HL_KERNEL_ASID_ID)
8758                 return;
8759
8760         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8761 }
8762
8763 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8764 {
8765         return gaudi_cq_assignment[cq_idx];
8766 }
8767
8768 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8769 {
8770         return sizeof(struct packet_msg_short) +
8771                         sizeof(struct packet_msg_prot) * 2;
8772 }
8773
8774 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8775 {
8776         return sizeof(struct packet_msg_short) * 4 +
8777                         sizeof(struct packet_fence) +
8778                         sizeof(struct packet_msg_prot) * 2;
8779 }
8780
8781 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8782 {
8783         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8784 }
8785
8786 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8787                                 u32 size, bool eb)
8788 {
8789         struct hl_cb *cb = (struct hl_cb *) data;
8790         struct packet_msg_short *pkt;
8791         u32 value, ctl, pkt_size = sizeof(*pkt);
8792
8793         pkt = cb->kernel_address + size;
8794         memset(pkt, 0, pkt_size);
8795
8796         /* Inc by 1, Mode ADD */
8797         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8798         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8799
8800         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8801         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8802         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8803         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8804         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8805         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8806         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8807
8808         pkt->value = cpu_to_le32(value);
8809         pkt->ctl = cpu_to_le32(ctl);
8810
8811         return size + pkt_size;
8812 }
8813
8814 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8815                                         u16 addr)
8816 {
8817         u32 ctl, pkt_size = sizeof(*pkt);
8818
8819         memset(pkt, 0, pkt_size);
8820
8821         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8822         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8823         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8824         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8825         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8826         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8827
8828         pkt->value = cpu_to_le32(value);
8829         pkt->ctl = cpu_to_le32(ctl);
8830
8831         return pkt_size;
8832 }
8833
8834 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8835                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8836                 u16 sob_val, u16 mon_id)
8837 {
8838         u64 monitor_base;
8839         u32 ctl, value, pkt_size = sizeof(*pkt);
8840         u16 msg_addr_offset;
8841         u8 mask;
8842
8843         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8844                 dev_err(hdev->dev,
8845                         "sob_base %u (mask %#x) is not valid\n",
8846                         sob_base, sob_mask);
8847                 return 0;
8848         }
8849
8850         /*
8851          * monitor_base should be the content of the base0 address registers,
8852          * so it will be added to the msg short offsets
8853          */
8854         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8855
8856         msg_addr_offset =
8857                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8858                                 monitor_base;
8859
8860         memset(pkt, 0, pkt_size);
8861
8862         /* Monitor config packet: bind the monitor to a sync object */
8863         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8864         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8865         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8866                         0); /* GREATER OR EQUAL*/
8867         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8868
8869         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8870         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8871         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8872         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8873         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8874         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8875         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8876
8877         pkt->value = cpu_to_le32(value);
8878         pkt->ctl = cpu_to_le32(ctl);
8879
8880         return pkt_size;
8881 }
8882
8883 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8884 {
8885         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8886
8887         memset(pkt, 0, pkt_size);
8888
8889         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8890         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8891         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8892
8893         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8894         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8895         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8896         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8897
8898         pkt->cfg = cpu_to_le32(cfg);
8899         pkt->ctl = cpu_to_le32(ctl);
8900
8901         return pkt_size;
8902 }
8903
8904 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8905 {
8906         u32 offset, nic_index;
8907
8908         switch (queue_id) {
8909         case GAUDI_QUEUE_ID_DMA_0_0:
8910                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8911                 break;
8912         case GAUDI_QUEUE_ID_DMA_0_1:
8913                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8914                 break;
8915         case GAUDI_QUEUE_ID_DMA_0_2:
8916                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8917                 break;
8918         case GAUDI_QUEUE_ID_DMA_0_3:
8919                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8920                 break;
8921         case GAUDI_QUEUE_ID_DMA_1_0:
8922                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8923                 break;
8924         case GAUDI_QUEUE_ID_DMA_1_1:
8925                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8926                 break;
8927         case GAUDI_QUEUE_ID_DMA_1_2:
8928                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8929                 break;
8930         case GAUDI_QUEUE_ID_DMA_1_3:
8931                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8932                 break;
8933         case GAUDI_QUEUE_ID_DMA_5_0:
8934                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8935                 break;
8936         case GAUDI_QUEUE_ID_DMA_5_1:
8937                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8938                 break;
8939         case GAUDI_QUEUE_ID_DMA_5_2:
8940                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8941                 break;
8942         case GAUDI_QUEUE_ID_DMA_5_3:
8943                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8944                 break;
8945         case GAUDI_QUEUE_ID_TPC_7_0:
8946                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8947                 break;
8948         case GAUDI_QUEUE_ID_TPC_7_1:
8949                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8950                 break;
8951         case GAUDI_QUEUE_ID_TPC_7_2:
8952                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8953                 break;
8954         case GAUDI_QUEUE_ID_TPC_7_3:
8955                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8956                 break;
8957         case GAUDI_QUEUE_ID_NIC_0_0:
8958         case GAUDI_QUEUE_ID_NIC_1_0:
8959         case GAUDI_QUEUE_ID_NIC_2_0:
8960         case GAUDI_QUEUE_ID_NIC_3_0:
8961         case GAUDI_QUEUE_ID_NIC_4_0:
8962         case GAUDI_QUEUE_ID_NIC_5_0:
8963         case GAUDI_QUEUE_ID_NIC_6_0:
8964         case GAUDI_QUEUE_ID_NIC_7_0:
8965         case GAUDI_QUEUE_ID_NIC_8_0:
8966         case GAUDI_QUEUE_ID_NIC_9_0:
8967                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8968                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8969                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8970                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8971                 break;
8972         case GAUDI_QUEUE_ID_NIC_0_1:
8973         case GAUDI_QUEUE_ID_NIC_1_1:
8974         case GAUDI_QUEUE_ID_NIC_2_1:
8975         case GAUDI_QUEUE_ID_NIC_3_1:
8976         case GAUDI_QUEUE_ID_NIC_4_1:
8977         case GAUDI_QUEUE_ID_NIC_5_1:
8978         case GAUDI_QUEUE_ID_NIC_6_1:
8979         case GAUDI_QUEUE_ID_NIC_7_1:
8980         case GAUDI_QUEUE_ID_NIC_8_1:
8981         case GAUDI_QUEUE_ID_NIC_9_1:
8982                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8983                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8984                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8985                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8986                 break;
8987         case GAUDI_QUEUE_ID_NIC_0_2:
8988         case GAUDI_QUEUE_ID_NIC_1_2:
8989         case GAUDI_QUEUE_ID_NIC_2_2:
8990         case GAUDI_QUEUE_ID_NIC_3_2:
8991         case GAUDI_QUEUE_ID_NIC_4_2:
8992         case GAUDI_QUEUE_ID_NIC_5_2:
8993         case GAUDI_QUEUE_ID_NIC_6_2:
8994         case GAUDI_QUEUE_ID_NIC_7_2:
8995         case GAUDI_QUEUE_ID_NIC_8_2:
8996         case GAUDI_QUEUE_ID_NIC_9_2:
8997                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8998                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8999                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9000                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9001                 break;
9002         case GAUDI_QUEUE_ID_NIC_0_3:
9003         case GAUDI_QUEUE_ID_NIC_1_3:
9004         case GAUDI_QUEUE_ID_NIC_2_3:
9005         case GAUDI_QUEUE_ID_NIC_3_3:
9006         case GAUDI_QUEUE_ID_NIC_4_3:
9007         case GAUDI_QUEUE_ID_NIC_5_3:
9008         case GAUDI_QUEUE_ID_NIC_6_3:
9009         case GAUDI_QUEUE_ID_NIC_7_3:
9010         case GAUDI_QUEUE_ID_NIC_8_3:
9011         case GAUDI_QUEUE_ID_NIC_9_3:
9012                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
9013                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
9014                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
9015                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
9016                 break;
9017         default:
9018                 return -EINVAL;
9019         }
9020
9021         *addr = CFG_BASE + offset;
9022
9023         return 0;
9024 }
9025
9026 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
9027 {
9028         u64 monitor_base;
9029         u32 size = 0;
9030         u16 msg_addr_offset;
9031
9032         /*
9033          * monitor_base should be the content of the base0 address registers,
9034          * so it will be added to the msg short offsets
9035          */
9036         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9037
9038         /* First monitor config packet: low address of the sync */
9039         msg_addr_offset =
9040                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9041                                 monitor_base;
9042
9043         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9044                                         msg_addr_offset);
9045
9046         /* Second monitor config packet: high address of the sync */
9047         msg_addr_offset =
9048                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9049                                 monitor_base;
9050
9051         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9052                                         msg_addr_offset);
9053
9054         /*
9055          * Third monitor config packet: the payload, i.e. what to write when the
9056          * sync triggers
9057          */
9058         msg_addr_offset =
9059                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9060                                 monitor_base;
9061
9062         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9063
9064         return size;
9065 }
9066
9067 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9068                                 struct hl_gen_wait_properties *prop)
9069 {
9070         struct hl_cb *cb = (struct hl_cb *) prop->data;
9071         void *buf = cb->kernel_address;
9072         u64 fence_addr = 0;
9073         u32 size = prop->size;
9074
9075         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9076                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9077                                 prop->q_idx);
9078                 return 0;
9079         }
9080
9081         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9082         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9083                         prop->sob_mask, prop->sob_val, prop->mon_id);
9084         size += gaudi_add_fence_pkt(buf + size);
9085
9086         return size;
9087 }
9088
9089 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9090 {
9091         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9092
9093         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9094                 hw_sob->sob_id);
9095
9096         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9097                         hw_sob->sob_id * 4, 0);
9098
9099         kref_init(&hw_sob->kref);
9100 }
9101
9102 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9103 {
9104         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9105                                                         HL_POWER9_HOST_MAGIC) {
9106                 hdev->power9_64bit_dma_enable = 1;
9107                 hdev->dma_mask = 64;
9108         } else {
9109                 hdev->power9_64bit_dma_enable = 0;
9110                 hdev->dma_mask = 48;
9111         }
9112 }
9113
9114 static u64 gaudi_get_device_time(struct hl_device *hdev)
9115 {
9116         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9117
9118         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9119 }
9120
9121 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9122                                 u32 *block_size, u32 *block_id)
9123 {
9124         return -EPERM;
9125 }
9126
9127 static int gaudi_block_mmap(struct hl_device *hdev,
9128                                 struct vm_area_struct *vma,
9129                                 u32 block_id, u32 block_size)
9130 {
9131         return -EPERM;
9132 }
9133
9134 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9135 {
9136         struct cpu_dyn_regs *dyn_regs =
9137                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9138         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9139                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9140                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9141
9142         WREG32(irq_handler_offset,
9143                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9144 }
9145
9146 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9147 {
9148         switch (pll_idx) {
9149         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9150         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9151         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9152         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9153         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9154         case HL_GAUDI_MME_PLL: return MME_PLL;
9155         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9156         case HL_GAUDI_IF_PLL: return IF_PLL;
9157         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9158         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9159         default: return -EINVAL;
9160         }
9161 }
9162
9163 static int gaudi_add_sync_to_engine_map_entry(
9164         struct hl_sync_to_engine_map *map, u32 reg_value,
9165         enum hl_sync_engine_type engine_type, u32 engine_id)
9166 {
9167         struct hl_sync_to_engine_map_entry *entry;
9168
9169         /* Reg value represents a partial address of sync object,
9170          * it is used as unique identifier. For this we need to
9171          * clear the cutoff cfg base bits from the value.
9172          */
9173         if (reg_value == 0 || reg_value == 0xffffffff)
9174                 return 0;
9175         reg_value -= (u32)CFG_BASE;
9176
9177         /* create a new hash entry */
9178         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9179         if (!entry)
9180                 return -ENOMEM;
9181         entry->engine_type = engine_type;
9182         entry->engine_id = engine_id;
9183         entry->sync_id = reg_value;
9184         hash_add(map->tb, &entry->node, reg_value);
9185
9186         return 0;
9187 }
9188
9189 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9190                                 struct hl_sync_to_engine_map *map)
9191 {
9192         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9193         struct gaudi_device *gaudi = hdev->asic_specific;
9194         int i, j, rc;
9195         u32 reg_value;
9196
9197         /* Iterate over TPC engines */
9198         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9199                 /* TPC registered must be accessed with clock gating disabled */
9200                 mutex_lock(&gaudi->clk_gate_mutex);
9201                 hdev->asic_funcs->disable_clock_gating(hdev);
9202
9203                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9204                                         sds->props[SP_NEXT_TPC] * i);
9205
9206                 /* We can reenable clock_gating */
9207                 hdev->asic_funcs->set_clock_gating(hdev);
9208                 mutex_unlock(&gaudi->clk_gate_mutex);
9209
9210                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9211                                                         ENGINE_TPC, i);
9212                 if (rc)
9213                         goto free_sync_to_engine_map;
9214         }
9215
9216         /* Iterate over MME engines */
9217         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9218                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9219                         /* MME registered must be accessed with clock gating
9220                          * disabled
9221                          */
9222                         mutex_lock(&gaudi->clk_gate_mutex);
9223                         hdev->asic_funcs->disable_clock_gating(hdev);
9224
9225                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9226                                                 sds->props[SP_NEXT_MME] * i +
9227                                                 j * sizeof(u32));
9228
9229                         /* We can reenable clock_gating */
9230                         hdev->asic_funcs->set_clock_gating(hdev);
9231                         mutex_unlock(&gaudi->clk_gate_mutex);
9232
9233                         rc = gaudi_add_sync_to_engine_map_entry(
9234                                 map, reg_value, ENGINE_MME,
9235                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9236                         if (rc)
9237                                 goto free_sync_to_engine_map;
9238                 }
9239         }
9240
9241         /* Iterate over DMA engines */
9242         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9243                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9244                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9245                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9246                                                         ENGINE_DMA, i);
9247                 if (rc)
9248                         goto free_sync_to_engine_map;
9249         }
9250
9251         return 0;
9252
9253 free_sync_to_engine_map:
9254         hl_state_dump_free_sync_to_engine_map(map);
9255
9256         return rc;
9257 }
9258
9259 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9260 {
9261         return FIELD_GET(
9262                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9263                 mon->status);
9264 }
9265
9266 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9267 {
9268         const size_t max_write = 10;
9269         u32 gid, mask, sob;
9270         int i, offset;
9271
9272         /* Sync object ID is calculated as follows:
9273          * (8 * group_id + cleared bits in mask)
9274          */
9275         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9276                         mon->arm_data);
9277         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9278                         mon->arm_data);
9279
9280         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9281                 max_write; mask >>= 1, i++) {
9282                 if (!(mask & 1)) {
9283                         sob = gid * MONITOR_MAX_SOBS + i;
9284
9285                         if (offset > 0)
9286                                 offset += snprintf(sobs + offset, max_write,
9287                                                         ", ");
9288
9289                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9290                 }
9291         }
9292 }
9293
9294 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9295                                 struct hl_device *hdev,
9296                                 struct hl_mon_state_dump *mon)
9297 {
9298         const char *name;
9299         char scratch_buf1[BIN_REG_STRING_SIZE],
9300                 scratch_buf2[BIN_REG_STRING_SIZE];
9301         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9302
9303         name = hl_state_dump_get_monitor_name(hdev, mon);
9304         if (!name)
9305                 name = "";
9306
9307         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9308
9309         return hl_snprintf_resize(
9310                 buf, size, offset,
9311                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9312                 mon->id, name,
9313                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9314                                 mon->arm_data),
9315                 hl_format_as_binary(
9316                         scratch_buf1, sizeof(scratch_buf1),
9317                         FIELD_GET(
9318                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9319                                 mon->arm_data)),
9320                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9321                                 mon->arm_data),
9322                 mon->wr_data,
9323                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9324                 hl_format_as_binary(
9325                         scratch_buf2, sizeof(scratch_buf2),
9326                         FIELD_GET(
9327                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9328                                 mon->status)),
9329                 monitored_sobs);
9330 }
9331
9332
9333 static int gaudi_print_fences_single_engine(
9334         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9335         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9336         size_t *size, size_t *offset)
9337 {
9338         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9339         int rc = -ENOMEM, i;
9340         u32 *statuses, *fences;
9341
9342         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9343                         sizeof(*statuses), GFP_KERNEL);
9344         if (!statuses)
9345                 goto out;
9346
9347         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9348                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9349                          sizeof(*fences), GFP_KERNEL);
9350         if (!fences)
9351                 goto free_status;
9352
9353         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9354                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9355
9356         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9357                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9358                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9359
9360         /* The actual print */
9361         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9362                 u32 fence_id;
9363                 u64 fence_cnt, fence_rdata;
9364                 const char *engine_name;
9365
9366                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9367                         statuses[i]))
9368                         continue;
9369
9370                 fence_id =
9371                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9372                 fence_cnt = base_offset + CFG_BASE +
9373                         sizeof(u32) *
9374                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9375                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9376                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9377                 engine_name = hl_sync_engine_to_string(engine_type);
9378
9379                 rc = hl_snprintf_resize(
9380                         buf, size, offset,
9381                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9382                         engine_name, engine_id,
9383                         i, fence_id,
9384                         fence_cnt, engine_name, engine_id, fence_id, i,
9385                         fence_rdata, engine_name, engine_id, fence_id, i,
9386                         fences[fence_id],
9387                         statuses[i]);
9388                 if (rc)
9389                         goto free_fences;
9390         }
9391
9392         rc = 0;
9393
9394 free_fences:
9395         kfree(fences);
9396 free_status:
9397         kfree(statuses);
9398 out:
9399         return rc;
9400 }
9401
9402
9403 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9404         .monitor_valid = gaudi_monitor_valid,
9405         .print_single_monitor = gaudi_print_single_monitor,
9406         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9407         .print_fences_single_engine = gaudi_print_fences_single_engine,
9408 };
9409
9410 static void gaudi_state_dump_init(struct hl_device *hdev)
9411 {
9412         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9413         int i;
9414
9415         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9416                 hash_add(sds->so_id_to_str_tb,
9417                         &gaudi_so_id_to_str[i].node,
9418                         gaudi_so_id_to_str[i].id);
9419
9420         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9421                 hash_add(sds->monitor_id_to_str_tb,
9422                         &gaudi_monitor_id_to_str[i].node,
9423                         gaudi_monitor_id_to_str[i].id);
9424
9425         sds->props = gaudi_state_dump_specs_props;
9426
9427         sds->sync_namager_names = gaudi_sync_manager_names;
9428
9429         sds->funcs = gaudi_state_dump_funcs;
9430 }
9431
9432 static u32 *gaudi_get_stream_master_qid_arr(void)
9433 {
9434         return gaudi_stream_master;
9435 }
9436
9437 static const struct hl_asic_funcs gaudi_funcs = {
9438         .early_init = gaudi_early_init,
9439         .early_fini = gaudi_early_fini,
9440         .late_init = gaudi_late_init,
9441         .late_fini = gaudi_late_fini,
9442         .sw_init = gaudi_sw_init,
9443         .sw_fini = gaudi_sw_fini,
9444         .hw_init = gaudi_hw_init,
9445         .hw_fini = gaudi_hw_fini,
9446         .halt_engines = gaudi_halt_engines,
9447         .suspend = gaudi_suspend,
9448         .resume = gaudi_resume,
9449         .mmap = gaudi_mmap,
9450         .ring_doorbell = gaudi_ring_doorbell,
9451         .pqe_write = gaudi_pqe_write,
9452         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9453         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9454         .scrub_device_mem = gaudi_scrub_device_mem,
9455         .get_int_queue_base = gaudi_get_int_queue_base,
9456         .test_queues = gaudi_test_queues,
9457         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9458         .asic_dma_pool_free = gaudi_dma_pool_free,
9459         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9460         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9461         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9462         .cs_parser = gaudi_cs_parser,
9463         .asic_dma_map_sg = gaudi_dma_map_sg,
9464         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9465         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9466         .update_eq_ci = gaudi_update_eq_ci,
9467         .context_switch = gaudi_context_switch,
9468         .restore_phase_topology = gaudi_restore_phase_topology,
9469         .debugfs_read32 = gaudi_debugfs_read32,
9470         .debugfs_write32 = gaudi_debugfs_write32,
9471         .debugfs_read64 = gaudi_debugfs_read64,
9472         .debugfs_write64 = gaudi_debugfs_write64,
9473         .debugfs_read_dma = gaudi_debugfs_read_dma,
9474         .add_device_attr = hl_add_device_attr,
9475         .handle_eqe = gaudi_handle_eqe,
9476         .set_pll_profile = hl_set_pll_profile,
9477         .get_events_stat = gaudi_get_events_stat,
9478         .read_pte = gaudi_read_pte,
9479         .write_pte = gaudi_write_pte,
9480         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9481         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9482         .send_heartbeat = gaudi_send_heartbeat,
9483         .set_clock_gating = gaudi_set_clock_gating,
9484         .disable_clock_gating = gaudi_disable_clock_gating,
9485         .debug_coresight = gaudi_debug_coresight,
9486         .is_device_idle = gaudi_is_device_idle,
9487         .soft_reset_late_init = gaudi_soft_reset_late_init,
9488         .hw_queues_lock = gaudi_hw_queues_lock,
9489         .hw_queues_unlock = gaudi_hw_queues_unlock,
9490         .get_pci_id = gaudi_get_pci_id,
9491         .get_eeprom_data = gaudi_get_eeprom_data,
9492         .send_cpu_message = gaudi_send_cpu_message,
9493         .pci_bars_map = gaudi_pci_bars_map,
9494         .init_iatu = gaudi_init_iatu,
9495         .rreg = hl_rreg,
9496         .wreg = hl_wreg,
9497         .halt_coresight = gaudi_halt_coresight,
9498         .ctx_init = gaudi_ctx_init,
9499         .ctx_fini = gaudi_ctx_fini,
9500         .get_clk_rate = hl_get_clk_rate,
9501         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9502         .load_firmware_to_device = gaudi_load_firmware_to_device,
9503         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9504         .get_signal_cb_size = gaudi_get_signal_cb_size,
9505         .get_wait_cb_size = gaudi_get_wait_cb_size,
9506         .gen_signal_cb = gaudi_gen_signal_cb,
9507         .gen_wait_cb = gaudi_gen_wait_cb,
9508         .reset_sob = gaudi_reset_sob,
9509         .reset_sob_group = gaudi_reset_sob_group,
9510         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9511         .get_device_time = gaudi_get_device_time,
9512         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9513         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9514         .scramble_addr = hl_mmu_scramble_addr,
9515         .descramble_addr = hl_mmu_descramble_addr,
9516         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9517         .get_hw_block_id = gaudi_get_hw_block_id,
9518         .hw_block_mmap = gaudi_block_mmap,
9519         .enable_events_from_fw = gaudi_enable_events_from_fw,
9520         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9521         .init_firmware_loader = gaudi_init_firmware_loader,
9522         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9523         .state_dump_init = gaudi_state_dump_init,
9524         .get_sob_addr = gaudi_get_sob_addr,
9525         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9526         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9527 };
9528
9529 /**
9530  * gaudi_set_asic_funcs - set GAUDI function pointers
9531  *
9532  * @hdev: pointer to hl_device structure
9533  *
9534  */
9535 void gaudi_set_asic_funcs(struct hl_device *hdev)
9536 {
9537         hdev->asic_funcs = &gaudi_funcs;
9538 }