Merge tag 'acpi-5.15-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461         bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465                                                                 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467                                         struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469                                         u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471                                         u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473                                 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479                                 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481                                 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486                 return HL_COLLECTIVE_MASTER;
487
488         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490                 return HL_COLLECTIVE_SLAVE;
491
492         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494                 return HL_COLLECTIVE_SLAVE;
495
496         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498                 return HL_COLLECTIVE_SLAVE;
499
500         return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507         if (hdev->card_type == cpucp_card_type_pmc) {
508                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510                 if (prop->fw_security_enabled)
511                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512                 else
513                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514         } else {
515                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517         }
518 }
519
520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522         struct asic_fixed_properties *prop = &hdev->asic_prop;
523         u32 num_sync_stream_queues = 0;
524         int i;
525
526         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527         prop->hw_queues_props = kcalloc(prop->max_queues,
528                         sizeof(struct hw_queue_properties),
529                         GFP_KERNEL);
530
531         if (!prop->hw_queues_props)
532                 return -ENOMEM;
533
534         for (i = 0 ; i < prop->max_queues ; i++) {
535                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537                         prop->hw_queues_props[i].driver_only = 0;
538                         prop->hw_queues_props[i].supports_sync_stream = 1;
539                         prop->hw_queues_props[i].cb_alloc_flags =
540                                 CB_ALLOC_KERNEL;
541                         num_sync_stream_queues++;
542                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544                         prop->hw_queues_props[i].driver_only = 1;
545                         prop->hw_queues_props[i].supports_sync_stream = 0;
546                         prop->hw_queues_props[i].cb_alloc_flags =
547                                 CB_ALLOC_KERNEL;
548                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550                         prop->hw_queues_props[i].driver_only = 0;
551                         prop->hw_queues_props[i].supports_sync_stream = 0;
552                         prop->hw_queues_props[i].cb_alloc_flags =
553                                 CB_ALLOC_USER;
554
555                 }
556                 prop->hw_queues_props[i].collective_mode =
557                                                 get_collective_mode(hdev, i);
558         }
559
560         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562         prop->collective_first_sob = 0;
563         prop->collective_first_mon = 0;
564
565         /* 2 SOBs per internal queue stream are reserved for collective */
566         prop->sync_stream_first_sob =
567                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568                         * QMAN_STREAMS * HL_RSVD_SOBS;
569
570         /* 1 monitor per internal queue stream are reserved for collective
571          * 2 monitors per external queue stream are reserved for collective
572          */
573         prop->sync_stream_first_mon =
574                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575                         (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577         prop->dram_base_address = DRAM_PHYS_BASE;
578         prop->dram_size = GAUDI_HBM_SIZE_32GB;
579         prop->dram_end_address = prop->dram_base_address +
580                                         prop->dram_size;
581         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583         prop->sram_base_address = SRAM_BASE_ADDR;
584         prop->sram_size = SRAM_SIZE;
585         prop->sram_end_address = prop->sram_base_address +
586                                         prop->sram_size;
587         prop->sram_user_base_address = prop->sram_base_address +
588                                         SRAM_USER_BASE_OFFSET;
589
590         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591         if (hdev->pldm)
592                 prop->mmu_pgt_size = 0x800000; /* 8MB */
593         else
594                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595         prop->mmu_pte_size = HL_PTE_SIZE;
596         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598         prop->dram_page_size = PAGE_SIZE_2MB;
599         prop->dram_supports_virtual_memory = false;
600
601         prop->pmmu.hop0_shift = HOP0_SHIFT;
602         prop->pmmu.hop1_shift = HOP1_SHIFT;
603         prop->pmmu.hop2_shift = HOP2_SHIFT;
604         prop->pmmu.hop3_shift = HOP3_SHIFT;
605         prop->pmmu.hop4_shift = HOP4_SHIFT;
606         prop->pmmu.hop0_mask = HOP0_MASK;
607         prop->pmmu.hop1_mask = HOP1_MASK;
608         prop->pmmu.hop2_mask = HOP2_MASK;
609         prop->pmmu.hop3_mask = HOP3_MASK;
610         prop->pmmu.hop4_mask = HOP4_MASK;
611         prop->pmmu.start_addr = VA_HOST_SPACE_START;
612         prop->pmmu.end_addr =
613                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614         prop->pmmu.page_size = PAGE_SIZE_4KB;
615         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616
617         /* PMMU and HPMMU are the same except of page size */
618         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620
621         /* shifts and masks are the same in PMMU and DMMU */
622         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624         prop->dmmu.end_addr = VA_HOST_SPACE_END;
625         prop->dmmu.page_size = PAGE_SIZE_2MB;
626
627         prop->cfg_size = CFG_SIZE;
628         prop->max_asid = MAX_ASID;
629         prop->num_of_events = GAUDI_EVENT_SIZE;
630         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631
632         set_default_power_values(hdev);
633
634         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636
637         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639
640         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641                                         CARD_NAME_MAX_LEN);
642
643         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644
645         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646                         prop->sync_stream_first_sob +
647                         (num_sync_stream_queues * HL_RSVD_SOBS);
648         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649                         prop->sync_stream_first_mon +
650                         (num_sync_stream_queues * HL_RSVD_MONS);
651
652         prop->first_available_user_msix_interrupt = USHRT_MAX;
653
654         for (i = 0 ; i < HL_MAX_DCORES ; i++)
655                 prop->first_available_cq[i] = USHRT_MAX;
656
657         prop->fw_cpu_boot_dev_sts0_valid = false;
658         prop->fw_cpu_boot_dev_sts1_valid = false;
659         prop->hard_reset_done_by_fw = false;
660         prop->gic_interrupts_enable = true;
661
662         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663
664         return 0;
665 }
666
667 static int gaudi_pci_bars_map(struct hl_device *hdev)
668 {
669         static const char * const name[] = {"SRAM", "CFG", "HBM"};
670         bool is_wc[3] = {false, false, true};
671         int rc;
672
673         rc = hl_pci_bars_map(hdev, name, is_wc);
674         if (rc)
675                 return rc;
676
677         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
678                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
679
680         return 0;
681 }
682
683 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
684 {
685         struct gaudi_device *gaudi = hdev->asic_specific;
686         struct hl_inbound_pci_region pci_region;
687         u64 old_addr = addr;
688         int rc;
689
690         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
691                 return old_addr;
692
693         if (hdev->asic_prop.iatu_done_by_fw)
694                 return U64_MAX;
695
696         /* Inbound Region 2 - Bar 4 - Point to HBM */
697         pci_region.mode = PCI_BAR_MATCH_MODE;
698         pci_region.bar = HBM_BAR_ID;
699         pci_region.addr = addr;
700         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
701         if (rc)
702                 return U64_MAX;
703
704         if (gaudi) {
705                 old_addr = gaudi->hbm_bar_cur_addr;
706                 gaudi->hbm_bar_cur_addr = addr;
707         }
708
709         return old_addr;
710 }
711
712 static int gaudi_init_iatu(struct hl_device *hdev)
713 {
714         struct hl_inbound_pci_region inbound_region;
715         struct hl_outbound_pci_region outbound_region;
716         int rc;
717
718         if (hdev->asic_prop.iatu_done_by_fw)
719                 return 0;
720
721         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
722         inbound_region.mode = PCI_BAR_MATCH_MODE;
723         inbound_region.bar = SRAM_BAR_ID;
724         inbound_region.addr = SRAM_BASE_ADDR;
725         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
726         if (rc)
727                 goto done;
728
729         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
730         inbound_region.mode = PCI_BAR_MATCH_MODE;
731         inbound_region.bar = CFG_BAR_ID;
732         inbound_region.addr = SPI_FLASH_BASE_ADDR;
733         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
734         if (rc)
735                 goto done;
736
737         /* Inbound Region 2 - Bar 4 - Point to HBM */
738         inbound_region.mode = PCI_BAR_MATCH_MODE;
739         inbound_region.bar = HBM_BAR_ID;
740         inbound_region.addr = DRAM_PHYS_BASE;
741         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
742         if (rc)
743                 goto done;
744
745         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
746
747         /* Outbound Region 0 - Point to Host */
748         outbound_region.addr = HOST_PHYS_BASE;
749         outbound_region.size = HOST_PHYS_SIZE;
750         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
751
752 done:
753         return rc;
754 }
755
756 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
757 {
758         return RREG32(mmHW_STATE);
759 }
760
761 static int gaudi_early_init(struct hl_device *hdev)
762 {
763         struct asic_fixed_properties *prop = &hdev->asic_prop;
764         struct pci_dev *pdev = hdev->pdev;
765         u32 fw_boot_status;
766         int rc;
767
768         rc = gaudi_set_fixed_properties(hdev);
769         if (rc) {
770                 dev_err(hdev->dev, "Failed setting fixed properties\n");
771                 return rc;
772         }
773
774         /* Check BAR sizes */
775         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
776                 dev_err(hdev->dev,
777                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
778                         SRAM_BAR_ID,
779                         (unsigned long long) pci_resource_len(pdev,
780                                                         SRAM_BAR_ID),
781                         SRAM_BAR_SIZE);
782                 rc = -ENODEV;
783                 goto free_queue_props;
784         }
785
786         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
787                 dev_err(hdev->dev,
788                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
789                         CFG_BAR_ID,
790                         (unsigned long long) pci_resource_len(pdev,
791                                                                 CFG_BAR_ID),
792                         CFG_BAR_SIZE);
793                 rc = -ENODEV;
794                 goto free_queue_props;
795         }
796
797         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
798
799         /* If FW security is enabled at this point it means no access to ELBI */
800         if (hdev->asic_prop.fw_security_enabled) {
801                 hdev->asic_prop.iatu_done_by_fw = true;
802
803                 /*
804                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
805                  * decision can only be taken based on PCI ID security.
806                  */
807                 hdev->asic_prop.gic_interrupts_enable = false;
808                 goto pci_init;
809         }
810
811         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
812                                 &fw_boot_status);
813         if (rc)
814                 goto free_queue_props;
815
816         /* Check whether FW is configuring iATU */
817         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
818                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
819                 hdev->asic_prop.iatu_done_by_fw = true;
820
821 pci_init:
822         rc = hl_pci_init(hdev);
823         if (rc)
824                 goto free_queue_props;
825
826         /* Before continuing in the initialization, we need to read the preboot
827          * version to determine whether we run with a security-enabled firmware
828          */
829         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
830                                         mmCPU_BOOT_DEV_STS0,
831                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
832                                         mmCPU_BOOT_ERR1,
833                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
834         if (rc) {
835                 if (hdev->reset_on_preboot_fail)
836                         hdev->asic_funcs->hw_fini(hdev, true, false);
837                 goto pci_fini;
838         }
839
840         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
841                 dev_info(hdev->dev,
842                         "H/W state is dirty, must reset before initializing\n");
843                 hdev->asic_funcs->hw_fini(hdev, true, false);
844         }
845
846         return 0;
847
848 pci_fini:
849         hl_pci_fini(hdev);
850 free_queue_props:
851         kfree(hdev->asic_prop.hw_queues_props);
852         return rc;
853 }
854
855 static int gaudi_early_fini(struct hl_device *hdev)
856 {
857         kfree(hdev->asic_prop.hw_queues_props);
858         hl_pci_fini(hdev);
859
860         return 0;
861 }
862
863 /**
864  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
865  *
866  * @hdev: pointer to hl_device structure
867  *
868  */
869 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
870 {
871         struct asic_fixed_properties *prop = &hdev->asic_prop;
872         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
873         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
874         int rc;
875
876         if (hdev->asic_prop.fw_security_enabled) {
877                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
878
879                 if (rc)
880                         return rc;
881
882                 freq = pll_freq_arr[2];
883         } else {
884                 /* Backward compatibility */
885                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
886                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
887                 nr = RREG32(mmPSOC_CPU_PLL_NR);
888                 nf = RREG32(mmPSOC_CPU_PLL_NF);
889                 od = RREG32(mmPSOC_CPU_PLL_OD);
890
891                 if (div_sel == DIV_SEL_REF_CLK ||
892                                 div_sel == DIV_SEL_DIVIDED_REF) {
893                         if (div_sel == DIV_SEL_REF_CLK)
894                                 freq = PLL_REF_CLK;
895                         else
896                                 freq = PLL_REF_CLK / (div_fctr + 1);
897                 } else if (div_sel == DIV_SEL_PLL_CLK ||
898                         div_sel == DIV_SEL_DIVIDED_PLL) {
899                         pll_clk = PLL_REF_CLK * (nf + 1) /
900                                         ((nr + 1) * (od + 1));
901                         if (div_sel == DIV_SEL_PLL_CLK)
902                                 freq = pll_clk;
903                         else
904                                 freq = pll_clk / (div_fctr + 1);
905                 } else {
906                         dev_warn(hdev->dev,
907                                 "Received invalid div select value: %d",
908                                 div_sel);
909                         freq = 0;
910                 }
911         }
912
913         prop->psoc_timestamp_frequency = freq;
914         prop->psoc_pci_pll_nr = nr;
915         prop->psoc_pci_pll_nf = nf;
916         prop->psoc_pci_pll_od = od;
917         prop->psoc_pci_pll_div_factor = div_fctr;
918
919         return 0;
920 }
921
922 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
923                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
924 {
925         struct asic_fixed_properties *prop = &hdev->asic_prop;
926         struct packet_lin_dma *init_tpc_mem_pkt;
927         struct hl_cs_job *job;
928         struct hl_cb *cb;
929         u64 dst_addr;
930         u32 cb_size, ctl;
931         u8 tpc_id;
932         int rc;
933
934         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
935         if (!cb)
936                 return -EFAULT;
937
938         init_tpc_mem_pkt = cb->kernel_address;
939         cb_size = sizeof(*init_tpc_mem_pkt);
940         memset(init_tpc_mem_pkt, 0, cb_size);
941
942         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
943
944         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
945         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
946         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
947         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
948
949         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
950
951         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
952         dst_addr = (prop->sram_user_base_address &
953                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
954                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
955         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
956
957         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
958         if (!job) {
959                 dev_err(hdev->dev, "Failed to allocate a new job\n");
960                 rc = -ENOMEM;
961                 goto release_cb;
962         }
963
964         job->id = 0;
965         job->user_cb = cb;
966         atomic_inc(&job->user_cb->cs_cnt);
967         job->user_cb_size = cb_size;
968         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
969         job->patched_cb = job->user_cb;
970         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
971
972         hl_debugfs_add_job(hdev, job);
973
974         rc = gaudi_send_job_on_qman0(hdev, job);
975
976         if (rc)
977                 goto free_job;
978
979         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
980                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
981                 if (rc)
982                         break;
983         }
984
985 free_job:
986         hl_userptr_delete_list(hdev, &job->userptr_list);
987         hl_debugfs_remove_job(hdev, job);
988         kfree(job);
989         atomic_dec(&cb->cs_cnt);
990
991 release_cb:
992         hl_cb_put(cb);
993         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
994
995         return rc;
996 }
997
998 /*
999  * gaudi_init_tpc_mem() - Initialize TPC memories.
1000  * @hdev: Pointer to hl_device structure.
1001  *
1002  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1003  *
1004  * Return: 0 for success, negative value for error.
1005  */
1006 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1007 {
1008         const struct firmware *fw;
1009         size_t fw_size;
1010         void *cpu_addr;
1011         dma_addr_t dma_handle;
1012         int rc, count = 5;
1013
1014 again:
1015         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1016         if (rc == -EINTR && count-- > 0) {
1017                 msleep(50);
1018                 goto again;
1019         }
1020
1021         if (rc) {
1022                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1023                                 GAUDI_TPC_FW_FILE);
1024                 goto out;
1025         }
1026
1027         fw_size = fw->size;
1028         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1029                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1030         if (!cpu_addr) {
1031                 dev_err(hdev->dev,
1032                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1033                         fw_size);
1034                 rc = -ENOMEM;
1035                 goto out;
1036         }
1037
1038         memcpy(cpu_addr, fw->data, fw_size);
1039
1040         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1041
1042         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1043                         dma_handle);
1044
1045 out:
1046         release_firmware(fw);
1047         return rc;
1048 }
1049
1050 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1051 {
1052         struct gaudi_device *gaudi = hdev->asic_specific;
1053         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1054         struct hl_hw_queue *q;
1055         u32 i, sob_id, sob_group_id, queue_id;
1056
1057         /* Iterate through SOB groups and assign a SOB for each slave queue */
1058         sob_group_id =
1059                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1060         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1061
1062         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1063         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1064                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1065                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1066         }
1067
1068         /* Both DMA5 and TPC7 use the same resources since only a single
1069          * engine need to participate in the reduction process
1070          */
1071         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1072         q = &hdev->kernel_queues[queue_id];
1073         q->sync_stream_prop.collective_sob_id =
1074                         sob_id + NIC_NUMBER_OF_ENGINES;
1075
1076         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1077         q = &hdev->kernel_queues[queue_id];
1078         q->sync_stream_prop.collective_sob_id =
1079                         sob_id + NIC_NUMBER_OF_ENGINES;
1080 }
1081
1082 static void gaudi_sob_group_hw_reset(struct kref *ref)
1083 {
1084         struct gaudi_hw_sob_group *hw_sob_group =
1085                 container_of(ref, struct gaudi_hw_sob_group, kref);
1086         struct hl_device *hdev = hw_sob_group->hdev;
1087         int i;
1088
1089         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1090                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1091                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1092
1093         kref_init(&hw_sob_group->kref);
1094 }
1095
1096 static void gaudi_sob_group_reset_error(struct kref *ref)
1097 {
1098         struct gaudi_hw_sob_group *hw_sob_group =
1099                 container_of(ref, struct gaudi_hw_sob_group, kref);
1100         struct hl_device *hdev = hw_sob_group->hdev;
1101
1102         dev_crit(hdev->dev,
1103                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1104                 hw_sob_group->base_sob_id);
1105 }
1106
1107 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1108 {
1109         struct gaudi_collective_properties *prop;
1110         int i;
1111
1112         prop = &gaudi->collective_props;
1113
1114         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1115
1116         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1117                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1118                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1119                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1120         /* Set collective engine bit */
1121         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1122                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1123 }
1124
1125 static int gaudi_collective_init(struct hl_device *hdev)
1126 {
1127         u32 i, sob_id, reserved_sobs_per_group;
1128         struct gaudi_collective_properties *prop;
1129         struct gaudi_device *gaudi;
1130
1131         gaudi = hdev->asic_specific;
1132         prop = &gaudi->collective_props;
1133         sob_id = hdev->asic_prop.collective_first_sob;
1134
1135         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1136         reserved_sobs_per_group =
1137                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1138
1139         /* Init SOB groups */
1140         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1141                 prop->hw_sob_group[i].hdev = hdev;
1142                 prop->hw_sob_group[i].base_sob_id = sob_id;
1143                 sob_id += reserved_sobs_per_group;
1144                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1145         }
1146
1147         for (i = 0 ; i < QMAN_STREAMS; i++) {
1148                 prop->next_sob_group_val[i] = 1;
1149                 prop->curr_sob_group_idx[i] = 0;
1150                 gaudi_collective_map_sobs(hdev, i);
1151         }
1152
1153         gaudi_collective_mstr_sob_mask_set(gaudi);
1154
1155         return 0;
1156 }
1157
1158 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1159 {
1160         struct gaudi_device *gaudi = hdev->asic_specific;
1161         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1162
1163         kref_put(&cprop->hw_sob_group[sob_group].kref,
1164                                         gaudi_sob_group_hw_reset);
1165 }
1166
1167 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1168                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1169 {
1170         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1171         struct gaudi_collective_properties *cprop;
1172         struct hl_gen_wait_properties wait_prop;
1173         struct hl_sync_stream_properties *prop;
1174         struct gaudi_device *gaudi;
1175
1176         gaudi = hdev->asic_specific;
1177         cprop = &gaudi->collective_props;
1178         queue_id = job->hw_queue_id;
1179         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1180
1181         master_sob_base =
1182                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1183         master_monitor = prop->collective_mstr_mon_id[0];
1184
1185         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1186
1187         dev_dbg(hdev->dev,
1188                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1189                 master_sob_base, cprop->mstr_sob_mask[0],
1190                 cprop->next_sob_group_val[stream],
1191                 master_monitor, queue_id);
1192
1193         wait_prop.data = (void *) job->patched_cb;
1194         wait_prop.sob_base = master_sob_base;
1195         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1196         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1197         wait_prop.mon_id = master_monitor;
1198         wait_prop.q_idx = queue_id;
1199         wait_prop.size = cb_size;
1200         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1201
1202         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1203         master_monitor = prop->collective_mstr_mon_id[1];
1204
1205         dev_dbg(hdev->dev,
1206                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1207                 master_sob_base, cprop->mstr_sob_mask[1],
1208                 cprop->next_sob_group_val[stream],
1209                 master_monitor, queue_id);
1210
1211         wait_prop.sob_base = master_sob_base;
1212         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1213         wait_prop.mon_id = master_monitor;
1214         wait_prop.size = cb_size;
1215         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1216 }
1217
1218 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1219                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1220 {
1221         struct hl_gen_wait_properties wait_prop;
1222         struct hl_sync_stream_properties *prop;
1223         u32 queue_id, cb_size = 0;
1224
1225         queue_id = job->hw_queue_id;
1226         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1227
1228         if (job->cs->encaps_signals) {
1229                 /* use the encaps signal handle store earlier in the flow
1230                  * and set the SOB information from the encaps
1231                  * signals handle
1232                  */
1233                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1234                                                 cs_cmpl);
1235
1236                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1237                                 job->cs->sequence,
1238                                 cs_cmpl->hw_sob->sob_id,
1239                                 cs_cmpl->sob_val);
1240         }
1241
1242         /* Add to wait CBs using slave monitor */
1243         wait_prop.data = (void *) job->user_cb;
1244         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1245         wait_prop.sob_mask = 0x1;
1246         wait_prop.sob_val = cs_cmpl->sob_val;
1247         wait_prop.mon_id = prop->collective_slave_mon_id;
1248         wait_prop.q_idx = queue_id;
1249         wait_prop.size = cb_size;
1250
1251         dev_dbg(hdev->dev,
1252                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1253                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1254                 prop->collective_slave_mon_id, queue_id);
1255
1256         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257
1258         dev_dbg(hdev->dev,
1259                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1260                 prop->collective_sob_id, queue_id);
1261
1262         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1263                         prop->collective_sob_id, cb_size, false);
1264 }
1265
1266 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1267 {
1268         struct hl_cs_compl *signal_cs_cmpl =
1269                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1270         struct hl_cs_compl *cs_cmpl =
1271                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1272         struct gaudi_collective_properties *cprop;
1273         u32 stream, queue_id, sob_group_offset;
1274         struct gaudi_device *gaudi;
1275         struct hl_device *hdev;
1276         struct hl_cs_job *job;
1277         struct hl_ctx *ctx;
1278
1279         ctx = cs->ctx;
1280         hdev = ctx->hdev;
1281         gaudi = hdev->asic_specific;
1282         cprop = &gaudi->collective_props;
1283
1284         /* In encaps signals case the SOB info will be retrieved from
1285          * the handle in gaudi_collective_slave_init_job.
1286          */
1287         if (!cs->encaps_signals) {
1288                 /* copy the SOB id and value of the signal CS */
1289                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1290                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1291         }
1292
1293         /* check again if the signal cs already completed.
1294          * if yes then don't send any wait cs since the hw_sob
1295          * could be in reset already. if signal is not completed
1296          * then get refcount to hw_sob to prevent resetting the sob
1297          * while wait cs is not submitted.
1298          * note that this check is protected by two locks,
1299          * hw queue lock and completion object lock,
1300          * and the same completion object lock also protects
1301          * the hw_sob reset handler function.
1302          * The hw_queue lock prevent out of sync of hw_sob
1303          * refcount value, changed by signal/wait flows.
1304          */
1305         spin_lock(&signal_cs_cmpl->lock);
1306
1307         if (completion_done(&cs->signal_fence->completion)) {
1308                 spin_unlock(&signal_cs_cmpl->lock);
1309                 return -EINVAL;
1310         }
1311         /* Increment kref since all slave queues are now waiting on it */
1312         kref_get(&cs_cmpl->hw_sob->kref);
1313
1314         spin_unlock(&signal_cs_cmpl->lock);
1315
1316         /* Calculate the stream from collective master queue (1st job) */
1317         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1318         stream = job->hw_queue_id % 4;
1319         sob_group_offset =
1320                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1321
1322         list_for_each_entry(job, &cs->job_list, cs_node) {
1323                 queue_id = job->hw_queue_id;
1324
1325                 if (hdev->kernel_queues[queue_id].collective_mode ==
1326                                 HL_COLLECTIVE_MASTER)
1327                         gaudi_collective_master_init_job(hdev, job, stream,
1328                                                 sob_group_offset);
1329                 else
1330                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1331         }
1332
1333         cs_cmpl->sob_group = sob_group_offset;
1334
1335         /* Handle sob group kref and wraparound */
1336         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1337         cprop->next_sob_group_val[stream]++;
1338
1339         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1340                 /*
1341                  * Decrement as we reached the max value.
1342                  * The release function won't be called here as we've
1343                  * just incremented the refcount.
1344                  */
1345                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1346                                 gaudi_sob_group_reset_error);
1347                 cprop->next_sob_group_val[stream] = 1;
1348                 /* only two SOBs are currently in use */
1349                 cprop->curr_sob_group_idx[stream] =
1350                         (cprop->curr_sob_group_idx[stream] + 1) &
1351                                                         (HL_RSVD_SOBS - 1);
1352
1353                 gaudi_collective_map_sobs(hdev, stream);
1354
1355                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1356                                 cprop->curr_sob_group_idx[stream], stream);
1357         }
1358
1359         mb();
1360         hl_fence_put(cs->signal_fence);
1361         cs->signal_fence = NULL;
1362
1363         return 0;
1364 }
1365
1366 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1367                 struct hl_ctx *ctx, struct hl_cs *cs,
1368                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1369                 u32 encaps_signal_offset)
1370 {
1371         struct hw_queue_properties *hw_queue_prop;
1372         struct hl_cs_counters_atomic *cntr;
1373         struct hl_cs_job *job;
1374         struct hl_cb *cb;
1375         u32 cb_size;
1376         bool patched_cb;
1377
1378         cntr = &hdev->aggregated_cs_counters;
1379
1380         if (mode == HL_COLLECTIVE_MASTER) {
1381                 /* CB size of collective master queue contains
1382                  * 4 msg short packets for monitor 1 configuration
1383                  * 1 fence packet
1384                  * 4 msg short packets for monitor 2 configuration
1385                  * 1 fence packet
1386                  * 2 msg prot packets for completion and MSI-X
1387                  */
1388                 cb_size = sizeof(struct packet_msg_short) * 8 +
1389                                 sizeof(struct packet_fence) * 2 +
1390                                 sizeof(struct packet_msg_prot) * 2;
1391                 patched_cb = true;
1392         } else {
1393                 /* CB size of collective slave queues contains
1394                  * 4 msg short packets for monitor configuration
1395                  * 1 fence packet
1396                  * 1 additional msg short packet for sob signal
1397                  */
1398                 cb_size = sizeof(struct packet_msg_short) * 5 +
1399                                 sizeof(struct packet_fence);
1400                 patched_cb = false;
1401         }
1402
1403         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1404         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1405         if (!job) {
1406                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1407                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1408                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1409                 return -ENOMEM;
1410         }
1411
1412         /* Allocate internal mapped CB for non patched CBs */
1413         cb = hl_cb_kernel_create(hdev, cb_size,
1414                         hdev->mmu_enable && !patched_cb);
1415         if (!cb) {
1416                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1417                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1418                 kfree(job);
1419                 return -EFAULT;
1420         }
1421
1422         job->id = 0;
1423         job->cs = cs;
1424         job->user_cb = cb;
1425         atomic_inc(&job->user_cb->cs_cnt);
1426         job->user_cb_size = cb_size;
1427         job->hw_queue_id = queue_id;
1428
1429         /* since its guaranteed to have only one chunk in the collective wait
1430          * cs, we can use this chunk to set the encapsulated signal offset
1431          * in the jobs.
1432          */
1433         if (cs->encaps_signals)
1434                 job->encaps_sig_wait_offset = encaps_signal_offset;
1435
1436         /*
1437          * No need in parsing, user CB is the patched CB.
1438          * We call hl_cb_destroy() out of two reasons - we don't need
1439          * the CB in the CB idr anymore and to decrement its refcount as
1440          * it was incremented inside hl_cb_kernel_create().
1441          */
1442         if (patched_cb)
1443                 job->patched_cb = job->user_cb;
1444         else
1445                 job->patched_cb = NULL;
1446
1447         job->job_cb_size = job->user_cb_size;
1448         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1449
1450         /* increment refcount as for external queues we get completion */
1451         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1452                 cs_get(cs);
1453
1454         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1455
1456         list_add_tail(&job->cs_node, &cs->job_list);
1457
1458         hl_debugfs_add_job(hdev, job);
1459
1460         return 0;
1461 }
1462
1463 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1464                 struct hl_ctx *ctx, struct hl_cs *cs,
1465                 u32 wait_queue_id, u32 collective_engine_id,
1466                 u32 encaps_signal_offset)
1467 {
1468         struct gaudi_device *gaudi = hdev->asic_specific;
1469         struct hw_queue_properties *hw_queue_prop;
1470         u32 queue_id, collective_queue, num_jobs;
1471         u32 stream, nic_queue, nic_idx = 0;
1472         bool skip;
1473         int i, rc = 0;
1474
1475         /* Verify wait queue id is configured as master */
1476         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1477         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1478                 dev_err(hdev->dev,
1479                         "Queue %d is not configured as collective master\n",
1480                         wait_queue_id);
1481                 return -EINVAL;
1482         }
1483
1484         /* Verify engine id is supported */
1485         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1486                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1487                 dev_err(hdev->dev,
1488                         "Collective wait does not support engine %u\n",
1489                         collective_engine_id);
1490                 return -EINVAL;
1491         }
1492
1493         stream = wait_queue_id % 4;
1494
1495         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1496                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1497         else
1498                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1499
1500         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1501         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1502
1503         /* First job goes to the collective master queue, it will wait for
1504          * the collective slave queues to finish execution.
1505          * The synchronization is done using two monitors:
1506          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1507          * reduction engine (DMA5/TPC7).
1508          *
1509          * Rest of the jobs goes to the collective slave queues which will
1510          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1511          */
1512         for (i = 0 ; i < num_jobs ; i++) {
1513                 if (i == 0) {
1514                         queue_id = wait_queue_id;
1515                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1516                                 HL_COLLECTIVE_MASTER, queue_id,
1517                                 wait_queue_id, encaps_signal_offset);
1518                 } else {
1519                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1520                                 if (gaudi->hw_cap_initialized &
1521                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1522                                         skip = false;
1523                                 else
1524                                         skip = true;
1525
1526                                 queue_id = nic_queue;
1527                                 nic_queue += 4;
1528                                 nic_idx++;
1529
1530                                 if (skip)
1531                                         continue;
1532                         } else {
1533                                 queue_id = collective_queue;
1534                         }
1535
1536                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1537                                 HL_COLLECTIVE_SLAVE, queue_id,
1538                                 wait_queue_id, encaps_signal_offset);
1539                 }
1540
1541                 if (rc)
1542                         return rc;
1543         }
1544
1545         return rc;
1546 }
1547
1548 static int gaudi_late_init(struct hl_device *hdev)
1549 {
1550         struct gaudi_device *gaudi = hdev->asic_specific;
1551         int rc;
1552
1553         rc = gaudi->cpucp_info_get(hdev);
1554         if (rc) {
1555                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1556                 return rc;
1557         }
1558
1559         if ((hdev->card_type == cpucp_card_type_pci) &&
1560                         (hdev->nic_ports_mask & 0x3)) {
1561                 dev_info(hdev->dev,
1562                         "PCI card detected, only 8 ports are enabled\n");
1563                 hdev->nic_ports_mask &= ~0x3;
1564
1565                 /* Stop and disable unused NIC QMANs */
1566                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1567                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1568                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1569
1570                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1573
1574                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1575                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1576
1577                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1578         }
1579
1580         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1581         if (rc) {
1582                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1583                 return rc;
1584         }
1585
1586         /* Scrub both SRAM and DRAM */
1587         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1588         if (rc)
1589                 goto disable_pci_access;
1590
1591         rc = gaudi_fetch_psoc_frequency(hdev);
1592         if (rc) {
1593                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1594                 goto disable_pci_access;
1595         }
1596
1597         rc = gaudi_mmu_clear_pgt_range(hdev);
1598         if (rc) {
1599                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1600                 goto disable_pci_access;
1601         }
1602
1603         rc = gaudi_init_tpc_mem(hdev);
1604         if (rc) {
1605                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1606                 goto disable_pci_access;
1607         }
1608
1609         rc = gaudi_collective_init(hdev);
1610         if (rc) {
1611                 dev_err(hdev->dev, "Failed to init collective\n");
1612                 goto disable_pci_access;
1613         }
1614
1615         /* We only support a single ASID for the user, so for the sake of optimization, just
1616          * initialize the ASID one time during device initialization with the fixed value of 1
1617          */
1618         gaudi_mmu_prepare(hdev, 1);
1619
1620         return 0;
1621
1622 disable_pci_access:
1623         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1624
1625         return rc;
1626 }
1627
1628 static void gaudi_late_fini(struct hl_device *hdev)
1629 {
1630         const struct hwmon_channel_info **channel_info_arr;
1631         int i = 0;
1632
1633         if (!hdev->hl_chip_info->info)
1634                 return;
1635
1636         channel_info_arr = hdev->hl_chip_info->info;
1637
1638         while (channel_info_arr[i]) {
1639                 kfree(channel_info_arr[i]->config);
1640                 kfree(channel_info_arr[i]);
1641                 i++;
1642         }
1643
1644         kfree(channel_info_arr);
1645
1646         hdev->hl_chip_info->info = NULL;
1647 }
1648
1649 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1650 {
1651         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1652         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1653         int i, j, rc = 0;
1654
1655         /*
1656          * The device CPU works with 40-bits addresses, while bit 39 must be set
1657          * to '1' when accessing the host.
1658          * Bits 49:39 of the full host address are saved for a later
1659          * configuration of the HW to perform extension to 50 bits.
1660          * Because there is a single HW register that holds the extension bits,
1661          * these bits must be identical in all allocated range.
1662          */
1663
1664         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1665                 virt_addr_arr[i] =
1666                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1667                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1668                                                 &dma_addr_arr[i],
1669                                                 GFP_KERNEL | __GFP_ZERO);
1670                 if (!virt_addr_arr[i]) {
1671                         rc = -ENOMEM;
1672                         goto free_dma_mem_arr;
1673                 }
1674
1675                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1676                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1677                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1678                         break;
1679         }
1680
1681         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1682                 dev_err(hdev->dev,
1683                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1684                 rc = -EFAULT;
1685                 goto free_dma_mem_arr;
1686         }
1687
1688         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1689         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1690         hdev->cpu_pci_msb_addr =
1691                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1692
1693         if (!hdev->asic_prop.fw_security_enabled)
1694                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1695
1696 free_dma_mem_arr:
1697         for (j = 0 ; j < i ; j++)
1698                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1699                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1700                                                 virt_addr_arr[j],
1701                                                 dma_addr_arr[j]);
1702
1703         return rc;
1704 }
1705
1706 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1707 {
1708         struct gaudi_device *gaudi = hdev->asic_specific;
1709         struct gaudi_internal_qman_info *q;
1710         u32 i;
1711
1712         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1713                 q = &gaudi->internal_qmans[i];
1714                 if (!q->pq_kernel_addr)
1715                         continue;
1716                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1717                                                         q->pq_kernel_addr,
1718                                                         q->pq_dma_addr);
1719         }
1720 }
1721
1722 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1723 {
1724         struct gaudi_device *gaudi = hdev->asic_specific;
1725         struct gaudi_internal_qman_info *q;
1726         int rc, i;
1727
1728         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1729                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1730                         continue;
1731
1732                 q = &gaudi->internal_qmans[i];
1733
1734                 switch (i) {
1735                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1736                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1737                         break;
1738                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1739                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1740                         break;
1741                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1742                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1743                         break;
1744                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1745                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1746                         break;
1747                 default:
1748                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1749                         rc = -EINVAL;
1750                         goto free_internal_qmans_pq_mem;
1751                 }
1752
1753                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1754                                                 hdev, q->pq_size,
1755                                                 &q->pq_dma_addr,
1756                                                 GFP_KERNEL | __GFP_ZERO);
1757                 if (!q->pq_kernel_addr) {
1758                         rc = -ENOMEM;
1759                         goto free_internal_qmans_pq_mem;
1760                 }
1761         }
1762
1763         return 0;
1764
1765 free_internal_qmans_pq_mem:
1766         gaudi_free_internal_qmans_pq_mem(hdev);
1767         return rc;
1768 }
1769
1770 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1771 {
1772         struct asic_fixed_properties *prop = &hdev->asic_prop;
1773         struct pci_mem_region *region;
1774
1775         /* CFG */
1776         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1777         region->region_base = CFG_BASE;
1778         region->region_size = CFG_SIZE;
1779         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1780         region->bar_size = CFG_BAR_SIZE;
1781         region->bar_id = CFG_BAR_ID;
1782         region->used = 1;
1783
1784         /* SRAM */
1785         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1786         region->region_base = SRAM_BASE_ADDR;
1787         region->region_size = SRAM_SIZE;
1788         region->offset_in_bar = 0;
1789         region->bar_size = SRAM_BAR_SIZE;
1790         region->bar_id = SRAM_BAR_ID;
1791         region->used = 1;
1792
1793         /* DRAM */
1794         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1795         region->region_base = DRAM_PHYS_BASE;
1796         region->region_size = hdev->asic_prop.dram_size;
1797         region->offset_in_bar = 0;
1798         region->bar_size = prop->dram_pci_bar_size;
1799         region->bar_id = HBM_BAR_ID;
1800         region->used = 1;
1801
1802         /* SP SRAM */
1803         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1804         region->region_base = PSOC_SCRATCHPAD_ADDR;
1805         region->region_size = PSOC_SCRATCHPAD_SIZE;
1806         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1807         region->bar_size = CFG_BAR_SIZE;
1808         region->bar_id = CFG_BAR_ID;
1809         region->used = 1;
1810 }
1811
1812 static int gaudi_sw_init(struct hl_device *hdev)
1813 {
1814         struct gaudi_device *gaudi;
1815         u32 i, event_id = 0;
1816         int rc;
1817
1818         /* Allocate device structure */
1819         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1820         if (!gaudi)
1821                 return -ENOMEM;
1822
1823         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1824                 if (gaudi_irq_map_table[i].valid) {
1825                         if (event_id == GAUDI_EVENT_SIZE) {
1826                                 dev_err(hdev->dev,
1827                                         "Event array exceeds the limit of %u events\n",
1828                                         GAUDI_EVENT_SIZE);
1829                                 rc = -EINVAL;
1830                                 goto free_gaudi_device;
1831                         }
1832
1833                         gaudi->events[event_id++] =
1834                                         gaudi_irq_map_table[i].fc_id;
1835                 }
1836         }
1837
1838         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1839
1840         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1841
1842         hdev->asic_specific = gaudi;
1843
1844         /* Create DMA pool for small allocations */
1845         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1846                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1847         if (!hdev->dma_pool) {
1848                 dev_err(hdev->dev, "failed to create DMA pool\n");
1849                 rc = -ENOMEM;
1850                 goto free_gaudi_device;
1851         }
1852
1853         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1854         if (rc)
1855                 goto free_dma_pool;
1856
1857         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1858         if (!hdev->cpu_accessible_dma_pool) {
1859                 dev_err(hdev->dev,
1860                         "Failed to create CPU accessible DMA pool\n");
1861                 rc = -ENOMEM;
1862                 goto free_cpu_dma_mem;
1863         }
1864
1865         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1866                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1867                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1868         if (rc) {
1869                 dev_err(hdev->dev,
1870                         "Failed to add memory to CPU accessible DMA pool\n");
1871                 rc = -EFAULT;
1872                 goto free_cpu_accessible_dma_pool;
1873         }
1874
1875         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1876         if (rc)
1877                 goto free_cpu_accessible_dma_pool;
1878
1879         spin_lock_init(&gaudi->hw_queues_lock);
1880         mutex_init(&gaudi->clk_gate_mutex);
1881
1882         hdev->supports_sync_stream = true;
1883         hdev->supports_coresight = true;
1884         hdev->supports_staged_submission = true;
1885         hdev->supports_wait_for_multi_cs = true;
1886
1887         hdev->asic_funcs->set_pci_memory_regions(hdev);
1888         hdev->stream_master_qid_arr =
1889                                 hdev->asic_funcs->get_stream_master_qid_arr();
1890         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1891
1892         return 0;
1893
1894 free_cpu_accessible_dma_pool:
1895         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1896 free_cpu_dma_mem:
1897         if (!hdev->asic_prop.fw_security_enabled)
1898                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1899                                         hdev->cpu_pci_msb_addr);
1900         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1901                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1902                         hdev->cpu_accessible_dma_mem,
1903                         hdev->cpu_accessible_dma_address);
1904 free_dma_pool:
1905         dma_pool_destroy(hdev->dma_pool);
1906 free_gaudi_device:
1907         kfree(gaudi);
1908         return rc;
1909 }
1910
1911 static int gaudi_sw_fini(struct hl_device *hdev)
1912 {
1913         struct gaudi_device *gaudi = hdev->asic_specific;
1914
1915         gaudi_free_internal_qmans_pq_mem(hdev);
1916
1917         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1918
1919         if (!hdev->asic_prop.fw_security_enabled)
1920                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1921                                         hdev->cpu_pci_msb_addr);
1922
1923         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1924                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1925                         hdev->cpu_accessible_dma_mem,
1926                         hdev->cpu_accessible_dma_address);
1927
1928         dma_pool_destroy(hdev->dma_pool);
1929
1930         mutex_destroy(&gaudi->clk_gate_mutex);
1931
1932         kfree(gaudi);
1933
1934         return 0;
1935 }
1936
1937 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1938 {
1939         struct hl_device *hdev = arg;
1940         int i;
1941
1942         if (hdev->disabled)
1943                 return IRQ_HANDLED;
1944
1945         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1946                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1947
1948         hl_irq_handler_eq(irq, &hdev->event_queue);
1949
1950         return IRQ_HANDLED;
1951 }
1952
1953 /*
1954  * For backward compatibility, new MSI interrupts should be set after the
1955  * existing CPU and NIC interrupts.
1956  */
1957 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1958                                 bool cpu_eq)
1959 {
1960         int msi_vec;
1961
1962         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1963                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1964                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1965
1966         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1967                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1968
1969         return pci_irq_vector(hdev->pdev, msi_vec);
1970 }
1971
1972 static int gaudi_enable_msi_single(struct hl_device *hdev)
1973 {
1974         int rc, irq;
1975
1976         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1977
1978         irq = gaudi_pci_irq_vector(hdev, 0, false);
1979         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1980                         "gaudi single msi", hdev);
1981         if (rc)
1982                 dev_err(hdev->dev,
1983                         "Failed to request single MSI IRQ\n");
1984
1985         return rc;
1986 }
1987
1988 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1989 {
1990         int cq_cnt = hdev->asic_prop.completion_queues_count;
1991         int rc, i, irq_cnt_init, irq;
1992
1993         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1994                 irq = gaudi_pci_irq_vector(hdev, i, false);
1995                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1996                                 &hdev->completion_queue[i]);
1997                 if (rc) {
1998                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1999                         goto free_irqs;
2000                 }
2001         }
2002
2003         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2004         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2005                                 &hdev->event_queue);
2006         if (rc) {
2007                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2008                 goto free_irqs;
2009         }
2010
2011         return 0;
2012
2013 free_irqs:
2014         for (i = 0 ; i < irq_cnt_init ; i++)
2015                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2016                                 &hdev->completion_queue[i]);
2017         return rc;
2018 }
2019
2020 static int gaudi_enable_msi(struct hl_device *hdev)
2021 {
2022         struct gaudi_device *gaudi = hdev->asic_specific;
2023         int rc;
2024
2025         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2026                 return 0;
2027
2028         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2029         if (rc < 0) {
2030                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2031                 return rc;
2032         }
2033
2034         if (rc < NUMBER_OF_INTERRUPTS) {
2035                 gaudi->multi_msi_mode = false;
2036                 rc = gaudi_enable_msi_single(hdev);
2037         } else {
2038                 gaudi->multi_msi_mode = true;
2039                 rc = gaudi_enable_msi_multi(hdev);
2040         }
2041
2042         if (rc)
2043                 goto free_pci_irq_vectors;
2044
2045         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2046
2047         return 0;
2048
2049 free_pci_irq_vectors:
2050         pci_free_irq_vectors(hdev->pdev);
2051         return rc;
2052 }
2053
2054 static void gaudi_sync_irqs(struct hl_device *hdev)
2055 {
2056         struct gaudi_device *gaudi = hdev->asic_specific;
2057         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2058
2059         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2060                 return;
2061
2062         /* Wait for all pending IRQs to be finished */
2063         if (gaudi->multi_msi_mode) {
2064                 for (i = 0 ; i < cq_cnt ; i++)
2065                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2066
2067                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2068                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2069                                                 true));
2070         } else {
2071                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2072         }
2073 }
2074
2075 static void gaudi_disable_msi(struct hl_device *hdev)
2076 {
2077         struct gaudi_device *gaudi = hdev->asic_specific;
2078         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2079
2080         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2081                 return;
2082
2083         gaudi_sync_irqs(hdev);
2084
2085         if (gaudi->multi_msi_mode) {
2086                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2087                                                 true);
2088                 free_irq(irq, &hdev->event_queue);
2089
2090                 for (i = 0 ; i < cq_cnt ; i++) {
2091                         irq = gaudi_pci_irq_vector(hdev, i, false);
2092                         free_irq(irq, &hdev->completion_queue[i]);
2093                 }
2094         } else {
2095                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2096         }
2097
2098         pci_free_irq_vectors(hdev->pdev);
2099
2100         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2101 }
2102
2103 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2104 {
2105         struct gaudi_device *gaudi = hdev->asic_specific;
2106
2107         if (hdev->asic_prop.fw_security_enabled)
2108                 return;
2109
2110         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2111                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2112                 return;
2113
2114         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2115                 return;
2116
2117         if (!hdev->sram_scrambler_enable)
2118                 return;
2119
2120         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2121                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2122         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2123                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2125                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2127                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2129                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2131                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2133                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2135                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136
2137         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2138                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2139         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2140                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2142                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2144                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2146                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2148                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2150                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2152                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153
2154         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2155                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2156         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2157                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2159                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2161                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2163                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2165                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2167                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2169                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170
2171         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2172 }
2173
2174 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2175 {
2176         struct gaudi_device *gaudi = hdev->asic_specific;
2177
2178         if (hdev->asic_prop.fw_security_enabled)
2179                 return;
2180
2181         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2182                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2183                 return;
2184
2185         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2186                 return;
2187
2188         if (!hdev->dram_scrambler_enable)
2189                 return;
2190
2191         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2192                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2193         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2194                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2196                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2198                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2200                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2202                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2204                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2206                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207
2208         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2209                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2210         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2211                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2213                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2215                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2217                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2219                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2221                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2223                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224
2225         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2226                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2227         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2228                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2230                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2232                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2234                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2236                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2238                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2240                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241
2242         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2243 }
2244
2245 static void gaudi_init_e2e(struct hl_device *hdev)
2246 {
2247         if (hdev->asic_prop.fw_security_enabled)
2248                 return;
2249
2250         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2251                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2252                 return;
2253
2254         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2255         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2256         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2257         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2258
2259         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2260         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2261         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2262         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2263
2264         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2265         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2266         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2267         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2268
2269         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2270         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2271         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2272         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2273
2274         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2275         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2276         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2277         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2278
2279         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2280         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2281         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2282         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2283
2284         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2285         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2286         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2287         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2288
2289         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2290         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2291         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2292         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2293
2294         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2295         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2296         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2297         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2298
2299         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2300         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2301         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2302         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2303
2304         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2305         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2306         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2307         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2308
2309         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2310         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2311         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2312         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2313
2314         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2315         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2316         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2317         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2318
2319         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2320         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2321         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2322         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2323
2324         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2325         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2326         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2327         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2328
2329         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2330         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2331         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2332         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2333
2334         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2335         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2336         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2337         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2338
2339         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2340         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2341         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2342         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2343
2344         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2345         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2346         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2347         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2348
2349         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2350         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2351         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2352         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2353
2354         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2355         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2356         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2357         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2358
2359         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2360         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2361         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2362         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2363
2364         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2365         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2366         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2367         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2368
2369         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2370         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2371         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2372         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2373
2374         if (!hdev->dram_scrambler_enable) {
2375                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2376                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2377                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2378                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2379
2380                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2381                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2382                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2383                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2384
2385                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2386                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2387                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2388                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2389
2390                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2391                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2392                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2393                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2394
2395                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2396                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2397                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2398                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2399
2400                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2401                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2402                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2403                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2404
2405                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2406                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2407                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2408                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2409
2410                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2411                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2412                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2413                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2414
2415                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2416                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2417                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2418                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2419
2420                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2421                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2422                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2423                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2424
2425                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2426                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2427                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2428                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2429
2430                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2431                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2432                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2433                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2434
2435                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2436                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2437                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2438                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2439
2440                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2441                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2442                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2443                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2444
2445                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2446                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2447                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2448                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2449
2450                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2451                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2452                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2453                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2454
2455                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2456                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2457                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2458                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2459
2460                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2461                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2462                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2463                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2464
2465                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2466                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2467                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2468                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2469
2470                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2471                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2472                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2473                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2474
2475                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2476                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2477                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2478                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2479
2480                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2481                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2482                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2483                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2484
2485                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2486                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2487                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2488                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2489
2490                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2491                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2492                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2493                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2494         }
2495
2496         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2497                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2498         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2499                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2500
2501         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2502                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2503         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2504                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2505
2506         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2507                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2508         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2509                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2510
2511         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2512                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2514                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515
2516         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2517                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2519                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520
2521         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2522                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2524                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525
2526         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2527                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2529                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530
2531         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2532                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2534                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535
2536         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2537                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2539                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540
2541         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2542                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2544                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545
2546         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2547                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2549                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550
2551         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2552                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2554                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555
2556         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2557                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2558         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2559                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2560
2561         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2562                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2563         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2564                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2565
2566         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2567                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2568         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2569                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2570
2571         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2572                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2573         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2574                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2575
2576         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2577                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2578         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2579                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2580
2581         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2582                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2583         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2584                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2585
2586         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2587                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2588         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2589                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2590
2591         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2592                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2594                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595
2596         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2597                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2598         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2599                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2600
2601         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2602                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2603         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2604                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2605
2606         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2607                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2608         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2609                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2610
2611         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2612                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2613         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2614                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2615 }
2616
2617 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2618 {
2619         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2620
2621         if (hdev->asic_prop.fw_security_enabled)
2622                 return;
2623
2624         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2625                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2626                 return;
2627
2628         hbm0_wr = 0x33333333;
2629         hbm0_rd = 0x77777777;
2630         hbm1_wr = 0x55555555;
2631         hbm1_rd = 0xDDDDDDDD;
2632
2633         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2634         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2635         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2636         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2637
2638         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2639         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2640         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2641         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2642
2643         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2644         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2645         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2646         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2647
2648         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2649         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2650         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2651         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2652
2653         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2654                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2655                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2656         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2657                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2658                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2659         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2660                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2661                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2662         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2663                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2664                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2665
2666         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2667                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2668                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2669         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2670                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2671                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2672         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2673                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2674                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2675         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2676                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678 }
2679
2680 static void gaudi_init_golden_registers(struct hl_device *hdev)
2681 {
2682         u32 tpc_offset;
2683         int tpc_id, i;
2684
2685         gaudi_init_e2e(hdev);
2686         gaudi_init_hbm_cred(hdev);
2687
2688         for (tpc_id = 0, tpc_offset = 0;
2689                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2690                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2691                 /* Mask all arithmetic interrupts from TPC */
2692                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2693                 /* Set 16 cache lines */
2694                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2695                                 ICACHE_FETCH_LINE_NUM, 2);
2696         }
2697
2698         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2699         for (i = 0 ; i < 128 ; i += 8)
2700                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2701
2702         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2703         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2704         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706 }
2707
2708 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2709                                         int qman_id, dma_addr_t qman_pq_addr)
2710 {
2711         struct cpu_dyn_regs *dyn_regs =
2712                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2713         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2714         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2715         u32 q_off, dma_qm_offset;
2716         u32 dma_qm_err_cfg, irq_handler_offset;
2717
2718         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2719
2720         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2721                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2722         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2723                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724         so_base_en_lo = lower_32_bits(CFG_BASE +
2725                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2726         so_base_en_hi = upper_32_bits(CFG_BASE +
2727                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2729                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2730         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2731                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732         so_base_ws_lo = lower_32_bits(CFG_BASE +
2733                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2734         so_base_ws_hi = upper_32_bits(CFG_BASE +
2735                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736
2737         q_off = dma_qm_offset + qman_id * 4;
2738
2739         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2740         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2741
2742         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2743         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2744         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2745
2746         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2747         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2748                                                         QMAN_LDMA_SRC_OFFSET);
2749         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2750                                                         QMAN_LDMA_DST_OFFSET);
2751
2752         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2753         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2754         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2755         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2756         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2757         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2758         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2759         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2760
2761         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2762
2763         /* The following configuration is needed only once per QMAN */
2764         if (qman_id == 0) {
2765                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2766                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2767                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2768
2769                 /* Configure RAZWI IRQ */
2770                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2771                 if (hdev->stop_on_err)
2772                         dma_qm_err_cfg |=
2773                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2774
2775                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2776
2777                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2778                         lower_32_bits(CFG_BASE + irq_handler_offset));
2779                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2780                         upper_32_bits(CFG_BASE + irq_handler_offset));
2781
2782                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2783                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2784                                                                         dma_id);
2785
2786                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2787                                 QM_ARB_ERR_MSG_EN_MASK);
2788
2789                 /* Increase ARB WDT to support streams architecture */
2790                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2791                                 GAUDI_ARB_WDT_TIMEOUT);
2792
2793                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2794                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2795
2796                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2797         }
2798 }
2799
2800 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2801 {
2802         struct cpu_dyn_regs *dyn_regs =
2803                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2804         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2805         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2806         u32 irq_handler_offset;
2807
2808         /* Set to maximum possible according to physical size */
2809         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2810         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2811
2812         /* WA for H/W bug H3-2116 */
2813         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2814
2815         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2816         if (hdev->stop_on_err)
2817                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2818
2819         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2820
2821         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2822                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2823                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2824
2825         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2826                 lower_32_bits(CFG_BASE + irq_handler_offset));
2827         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2828                 upper_32_bits(CFG_BASE + irq_handler_offset));
2829
2830         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2831                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2832         WREG32(mmDMA0_CORE_PROT + dma_offset,
2833                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2834         /* If the channel is secured, it should be in MMU bypass mode */
2835         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2836                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2837         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2838 }
2839
2840 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2841                                 u32 enable_mask)
2842 {
2843         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2844
2845         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2846 }
2847
2848 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2849 {
2850         struct gaudi_device *gaudi = hdev->asic_specific;
2851         struct hl_hw_queue *q;
2852         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2853
2854         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2855                 return;
2856
2857         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2858                 dma_id = gaudi_dma_assignment[i];
2859                 /*
2860                  * For queues after the CPU Q need to add 1 to get the correct
2861                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2862                  * order to get the correct MSI register.
2863                  */
2864                 if (dma_id > 1) {
2865                         cpu_skip = 1;
2866                         nic_skip = NIC_NUMBER_OF_ENGINES;
2867                 } else {
2868                         cpu_skip = 0;
2869                         nic_skip = 0;
2870                 }
2871
2872                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2873                         q_idx = 4 * dma_id + j + cpu_skip;
2874                         q = &hdev->kernel_queues[q_idx];
2875                         q->cq_id = cq_id++;
2876                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2877                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2878                                                 q->bus_address);
2879                 }
2880
2881                 gaudi_init_dma_core(hdev, dma_id);
2882
2883                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2884         }
2885
2886         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2887 }
2888
2889 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2890                                         int qman_id, u64 qman_base_addr)
2891 {
2892         struct cpu_dyn_regs *dyn_regs =
2893                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2894         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2895         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2896         u32 dma_qm_err_cfg, irq_handler_offset;
2897         u32 q_off, dma_qm_offset;
2898
2899         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2900
2901         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2902                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2903         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2904                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905         so_base_en_lo = lower_32_bits(CFG_BASE +
2906                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2907         so_base_en_hi = upper_32_bits(CFG_BASE +
2908                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2910                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2912                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913         so_base_ws_lo = lower_32_bits(CFG_BASE +
2914                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915         so_base_ws_hi = upper_32_bits(CFG_BASE +
2916                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917
2918         q_off = dma_qm_offset + qman_id * 4;
2919
2920         if (qman_id < 4) {
2921                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2922                                         lower_32_bits(qman_base_addr));
2923                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2924                                         upper_32_bits(qman_base_addr));
2925
2926                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2927                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2928                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2929
2930                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2931                                                         QMAN_CPDMA_SIZE_OFFSET);
2932                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2933                                                         QMAN_CPDMA_SRC_OFFSET);
2934                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2935                                                         QMAN_CPDMA_DST_OFFSET);
2936         } else {
2937                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2938                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2939                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2940
2941                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2942                                                         QMAN_LDMA_SIZE_OFFSET);
2943                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2944                                                         QMAN_LDMA_SRC_OFFSET);
2945                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2946                                                         QMAN_LDMA_DST_OFFSET);
2947
2948                 /* Configure RAZWI IRQ */
2949                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2950                 if (hdev->stop_on_err)
2951                         dma_qm_err_cfg |=
2952                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2953
2954                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2955
2956                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2957                         lower_32_bits(CFG_BASE + irq_handler_offset));
2958                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2959                         upper_32_bits(CFG_BASE + irq_handler_offset));
2960
2961                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2962                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2963                                                                         dma_id);
2964
2965                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2966                                 QM_ARB_ERR_MSG_EN_MASK);
2967
2968                 /* Increase ARB WDT to support streams architecture */
2969                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2970                                 GAUDI_ARB_WDT_TIMEOUT);
2971
2972                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2973                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2974                                 QMAN_INTERNAL_MAKE_TRUSTED);
2975         }
2976
2977         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2978         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2979         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2980         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2981
2982         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2983         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2984                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2985                                 mtr_base_ws_lo);
2986                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2987                                 mtr_base_ws_hi);
2988                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2989                                 so_base_ws_lo);
2990                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2991                                 so_base_ws_hi);
2992         }
2993 }
2994
2995 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2996 {
2997         struct gaudi_device *gaudi = hdev->asic_specific;
2998         struct gaudi_internal_qman_info *q;
2999         u64 qman_base_addr;
3000         int i, j, dma_id, internal_q_index;
3001
3002         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3003                 return;
3004
3005         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3006                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3007
3008                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3009                          /*
3010                           * Add the CPU queue in order to get the correct queue
3011                           * number as all internal queue are placed after it
3012                           */
3013                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3014
3015                         q = &gaudi->internal_qmans[internal_q_index];
3016                         qman_base_addr = (u64) q->pq_dma_addr;
3017                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3018                                                 qman_base_addr);
3019                 }
3020
3021                 /* Initializing lower CP for HBM DMA QMAN */
3022                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3023
3024                 gaudi_init_dma_core(hdev, dma_id);
3025
3026                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3027         }
3028
3029         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3030 }
3031
3032 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3033                                         int qman_id, u64 qman_base_addr)
3034 {
3035         struct cpu_dyn_regs *dyn_regs =
3036                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3037         u32 mtr_base_lo, mtr_base_hi;
3038         u32 so_base_lo, so_base_hi;
3039         u32 irq_handler_offset;
3040         u32 q_off, mme_id;
3041         u32 mme_qm_err_cfg;
3042
3043         mtr_base_lo = lower_32_bits(CFG_BASE +
3044                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3045         mtr_base_hi = upper_32_bits(CFG_BASE +
3046                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047         so_base_lo = lower_32_bits(CFG_BASE +
3048                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3049         so_base_hi = upper_32_bits(CFG_BASE +
3050                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051
3052         q_off = mme_offset + qman_id * 4;
3053
3054         if (qman_id < 4) {
3055                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3056                                         lower_32_bits(qman_base_addr));
3057                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3058                                         upper_32_bits(qman_base_addr));
3059
3060                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3061                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3062                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3063
3064                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3065                                                         QMAN_CPDMA_SIZE_OFFSET);
3066                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3067                                                         QMAN_CPDMA_SRC_OFFSET);
3068                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3069                                                         QMAN_CPDMA_DST_OFFSET);
3070         } else {
3071                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3072                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3073                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3074
3075                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3076                                                         QMAN_LDMA_SIZE_OFFSET);
3077                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3078                                                         QMAN_LDMA_SRC_OFFSET);
3079                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3080                                                         QMAN_LDMA_DST_OFFSET);
3081
3082                 /* Configure RAZWI IRQ */
3083                 mme_id = mme_offset /
3084                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3085
3086                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3087                 if (hdev->stop_on_err)
3088                         mme_qm_err_cfg |=
3089                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3090
3091                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3092
3093                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3094                         lower_32_bits(CFG_BASE + irq_handler_offset));
3095                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3096                         upper_32_bits(CFG_BASE + irq_handler_offset));
3097
3098                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3099                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3100                                                                         mme_id);
3101
3102                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3103                                 QM_ARB_ERR_MSG_EN_MASK);
3104
3105                 /* Increase ARB WDT to support streams architecture */
3106                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3107                                 GAUDI_ARB_WDT_TIMEOUT);
3108
3109                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3110                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3111                                 QMAN_INTERNAL_MAKE_TRUSTED);
3112         }
3113
3114         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3115         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3116         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3117         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3118 }
3119
3120 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3121 {
3122         struct gaudi_device *gaudi = hdev->asic_specific;
3123         struct gaudi_internal_qman_info *q;
3124         u64 qman_base_addr;
3125         u32 mme_offset;
3126         int i, internal_q_index;
3127
3128         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3129                 return;
3130
3131         /*
3132          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3133          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3134          */
3135
3136         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3137
3138         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3139                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3140                 q = &gaudi->internal_qmans[internal_q_index];
3141                 qman_base_addr = (u64) q->pq_dma_addr;
3142                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3143                                         qman_base_addr);
3144                 if (i == 3)
3145                         mme_offset = 0;
3146         }
3147
3148         /* Initializing lower CP for MME QMANs */
3149         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3150         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3151         gaudi_init_mme_qman(hdev, 0, 4, 0);
3152
3153         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3154         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3155
3156         gaudi->hw_cap_initialized |= HW_CAP_MME;
3157 }
3158
3159 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3160                                 int qman_id, u64 qman_base_addr)
3161 {
3162         struct cpu_dyn_regs *dyn_regs =
3163                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3164         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3165         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3166         u32 tpc_qm_err_cfg, irq_handler_offset;
3167         u32 q_off, tpc_id;
3168
3169         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3170                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3171         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3172                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173         so_base_en_lo = lower_32_bits(CFG_BASE +
3174                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3175         so_base_en_hi = upper_32_bits(CFG_BASE +
3176                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3178                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3179         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3180                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181         so_base_ws_lo = lower_32_bits(CFG_BASE +
3182                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3183         so_base_ws_hi = upper_32_bits(CFG_BASE +
3184                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185
3186         q_off = tpc_offset + qman_id * 4;
3187
3188         tpc_id = tpc_offset /
3189                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3190
3191         if (qman_id < 4) {
3192                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3193                                         lower_32_bits(qman_base_addr));
3194                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3195                                         upper_32_bits(qman_base_addr));
3196
3197                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3198                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3199                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3200
3201                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3202                                                         QMAN_CPDMA_SIZE_OFFSET);
3203                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3204                                                         QMAN_CPDMA_SRC_OFFSET);
3205                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3206                                                         QMAN_CPDMA_DST_OFFSET);
3207         } else {
3208                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3209                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3210                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3211
3212                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3213                                                         QMAN_LDMA_SIZE_OFFSET);
3214                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3215                                                         QMAN_LDMA_SRC_OFFSET);
3216                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3217                                                         QMAN_LDMA_DST_OFFSET);
3218
3219                 /* Configure RAZWI IRQ */
3220                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3221                 if (hdev->stop_on_err)
3222                         tpc_qm_err_cfg |=
3223                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3224
3225                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3226
3227                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3228                         lower_32_bits(CFG_BASE + irq_handler_offset));
3229                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3230                         upper_32_bits(CFG_BASE + irq_handler_offset));
3231
3232                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3233                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3234                                                                         tpc_id);
3235
3236                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3237                                 QM_ARB_ERR_MSG_EN_MASK);
3238
3239                 /* Increase ARB WDT to support streams architecture */
3240                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3241                                 GAUDI_ARB_WDT_TIMEOUT);
3242
3243                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3244                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3245                                 QMAN_INTERNAL_MAKE_TRUSTED);
3246         }
3247
3248         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3249         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3250         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3251         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3252
3253         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3254         if (tpc_id == 6) {
3255                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3256                                 mtr_base_ws_lo);
3257                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3258                                 mtr_base_ws_hi);
3259                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3260                                 so_base_ws_lo);
3261                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3262                                 so_base_ws_hi);
3263         }
3264 }
3265
3266 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3267 {
3268         struct gaudi_device *gaudi = hdev->asic_specific;
3269         struct gaudi_internal_qman_info *q;
3270         u64 qman_base_addr;
3271         u32 so_base_hi, tpc_offset = 0;
3272         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3273                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3274         int i, tpc_id, internal_q_index;
3275
3276         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3277                 return;
3278
3279         so_base_hi = upper_32_bits(CFG_BASE +
3280                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3281
3282         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3283                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3284                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3285                                                 tpc_id * QMAN_STREAMS + i;
3286                         q = &gaudi->internal_qmans[internal_q_index];
3287                         qman_base_addr = (u64) q->pq_dma_addr;
3288                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3289                                                 qman_base_addr);
3290
3291                         if (i == 3) {
3292                                 /* Initializing lower CP for TPC QMAN */
3293                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3294
3295                                 /* Enable the QMAN and TPC channel */
3296                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3297                                                 QMAN_TPC_ENABLE);
3298                         }
3299                 }
3300
3301                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3302                                 so_base_hi);
3303
3304                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3305
3306                 gaudi->hw_cap_initialized |=
3307                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3308         }
3309 }
3310
3311 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3312                                 int qman_id, u64 qman_base_addr, int nic_id)
3313 {
3314         struct cpu_dyn_regs *dyn_regs =
3315                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3316         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3317         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3318         u32 nic_qm_err_cfg, irq_handler_offset;
3319         u32 q_off;
3320
3321         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3322                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3323         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3324                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325         so_base_en_lo = lower_32_bits(CFG_BASE +
3326                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3327         so_base_en_hi = upper_32_bits(CFG_BASE +
3328                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3330                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3331         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3332                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333         so_base_ws_lo = lower_32_bits(CFG_BASE +
3334                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3335         so_base_ws_hi = upper_32_bits(CFG_BASE +
3336                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337
3338         q_off = nic_offset + qman_id * 4;
3339
3340         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3341         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3342
3343         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3344         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3345         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3346
3347         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3348                                                         QMAN_LDMA_SIZE_OFFSET);
3349         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3350                                                         QMAN_LDMA_SRC_OFFSET);
3351         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3352                                                         QMAN_LDMA_DST_OFFSET);
3353
3354         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3355         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3356         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3357         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3358
3359         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3360         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3361         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3362         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3363         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3364
3365         if (qman_id == 0) {
3366                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3367                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3368                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3369
3370                 /* Configure RAZWI IRQ */
3371                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3372                 if (hdev->stop_on_err)
3373                         nic_qm_err_cfg |=
3374                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3375
3376                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3377
3378                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3379                         lower_32_bits(CFG_BASE + irq_handler_offset));
3380                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3381                         upper_32_bits(CFG_BASE + irq_handler_offset));
3382
3383                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3384                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3385                                                                         nic_id);
3386
3387                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3388                                 QM_ARB_ERR_MSG_EN_MASK);
3389
3390                 /* Increase ARB WDT to support streams architecture */
3391                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3392                                 GAUDI_ARB_WDT_TIMEOUT);
3393
3394                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3395                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3396                                 QMAN_INTERNAL_MAKE_TRUSTED);
3397         }
3398 }
3399
3400 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3401 {
3402         struct gaudi_device *gaudi = hdev->asic_specific;
3403         struct gaudi_internal_qman_info *q;
3404         u64 qman_base_addr;
3405         u32 nic_offset = 0;
3406         u32 nic_delta_between_qmans =
3407                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3408         u32 nic_delta_between_nics =
3409                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410         int i, nic_id, internal_q_index;
3411
3412         if (!hdev->nic_ports_mask)
3413                 return;
3414
3415         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3416                 return;
3417
3418         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3419
3420         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3421                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3422                         nic_offset += nic_delta_between_qmans;
3423                         if (nic_id & 1) {
3424                                 nic_offset -= (nic_delta_between_qmans * 2);
3425                                 nic_offset += nic_delta_between_nics;
3426                         }
3427                         continue;
3428                 }
3429
3430                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3431                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3432                                                 nic_id * QMAN_STREAMS + i;
3433                         q = &gaudi->internal_qmans[internal_q_index];
3434                         qman_base_addr = (u64) q->pq_dma_addr;
3435                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3436                                                 qman_base_addr, nic_id);
3437                 }
3438
3439                 /* Enable the QMAN */
3440                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3441
3442                 nic_offset += nic_delta_between_qmans;
3443                 if (nic_id & 1) {
3444                         nic_offset -= (nic_delta_between_qmans * 2);
3445                         nic_offset += nic_delta_between_nics;
3446                 }
3447
3448                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3449         }
3450 }
3451
3452 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3453 {
3454         struct gaudi_device *gaudi = hdev->asic_specific;
3455
3456         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3457                 return;
3458
3459         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3460         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3461         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3462 }
3463
3464 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3465 {
3466         struct gaudi_device *gaudi = hdev->asic_specific;
3467
3468         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3469                 return;
3470
3471         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3472         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3473         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3474         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3475         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3476 }
3477
3478 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3479 {
3480         struct gaudi_device *gaudi = hdev->asic_specific;
3481
3482         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3483                 return;
3484
3485         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3486         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3487 }
3488
3489 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3490 {
3491         struct gaudi_device *gaudi = hdev->asic_specific;
3492         u32 tpc_offset = 0;
3493         int tpc_id;
3494
3495         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3496                 return;
3497
3498         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3499                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3500                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3501         }
3502 }
3503
3504 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3505 {
3506         struct gaudi_device *gaudi = hdev->asic_specific;
3507         u32 nic_mask, nic_offset = 0;
3508         u32 nic_delta_between_qmans =
3509                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3510         u32 nic_delta_between_nics =
3511                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512         int nic_id;
3513
3514         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3515                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3516
3517                 if (gaudi->hw_cap_initialized & nic_mask)
3518                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3519
3520                 nic_offset += nic_delta_between_qmans;
3521                 if (nic_id & 1) {
3522                         nic_offset -= (nic_delta_between_qmans * 2);
3523                         nic_offset += nic_delta_between_nics;
3524                 }
3525         }
3526 }
3527
3528 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3529 {
3530         struct gaudi_device *gaudi = hdev->asic_specific;
3531
3532         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3533                 return;
3534
3535         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3536         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3537         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3538         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539 }
3540
3541 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3542 {
3543         struct gaudi_device *gaudi = hdev->asic_specific;
3544
3545         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3546                 return;
3547
3548         /* Stop CPs of HBM DMA QMANs */
3549
3550         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3551         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3552         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555 }
3556
3557 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3558 {
3559         struct gaudi_device *gaudi = hdev->asic_specific;
3560
3561         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3562                 return;
3563
3564         /* Stop CPs of MME QMANs */
3565         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3567 }
3568
3569 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3570 {
3571         struct gaudi_device *gaudi = hdev->asic_specific;
3572
3573         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3574                 return;
3575
3576         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3577         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3578         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584 }
3585
3586 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3587 {
3588         struct gaudi_device *gaudi = hdev->asic_specific;
3589
3590         /* Stop upper CPs of QMANs */
3591
3592         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3593                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3594                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3595                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3596                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3597
3598         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3599                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3600                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3601                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3602                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3603
3604         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3605                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3606                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3607                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3608                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3609
3610         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3611                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3612                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615
3616         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3617                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3618                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621
3622         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3623                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3624                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627
3628         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3629                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3630                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633
3634         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3635                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3636                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3637                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3638                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3639
3640         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3641                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3642                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3643                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3644                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3645
3646         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3647                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3648                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3649                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3650                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3651 }
3652
3653 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3654 {
3655         struct gaudi_device *gaudi = hdev->asic_specific;
3656
3657         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3658                 return;
3659
3660         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3661         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3662         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663 }
3664
3665 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3666 {
3667         struct gaudi_device *gaudi = hdev->asic_specific;
3668
3669         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3670                 return;
3671
3672         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3673         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3674         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677 }
3678
3679 static void gaudi_mme_stall(struct hl_device *hdev)
3680 {
3681         struct gaudi_device *gaudi = hdev->asic_specific;
3682
3683         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3684                 return;
3685
3686         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3687         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3688         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3689         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3690         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3691         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3692         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3693         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3694         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3695         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3696         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3697         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3698         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3699         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3700         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3701         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3702         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3703 }
3704
3705 static void gaudi_tpc_stall(struct hl_device *hdev)
3706 {
3707         struct gaudi_device *gaudi = hdev->asic_specific;
3708
3709         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3710                 return;
3711
3712         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3713         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3714         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720 }
3721
3722 static void gaudi_set_clock_gating(struct hl_device *hdev)
3723 {
3724         struct gaudi_device *gaudi = hdev->asic_specific;
3725         u32 qman_offset;
3726         bool enable;
3727         int i;
3728
3729         /* In case we are during debug session, don't enable the clock gate
3730          * as it may interfere
3731          */
3732         if (hdev->in_debug)
3733                 return;
3734
3735         if (hdev->asic_prop.fw_security_enabled)
3736                 return;
3737
3738         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3739                 enable = !!(hdev->clock_gating_mask &
3740                                 (BIT_ULL(gaudi_dma_assignment[i])));
3741
3742                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3743                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3744                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3745                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3746                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3747         }
3748
3749         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3750                 enable = !!(hdev->clock_gating_mask &
3751                                 (BIT_ULL(gaudi_dma_assignment[i])));
3752
3753                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3754                  * we need to not enable clock gating in that DMA
3755                  */
3756                 if (i == GAUDI_HBM_DMA_4)
3757                         enable = 0;
3758
3759                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3760                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3761                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3762                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3763                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3764         }
3765
3766         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3767         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3768         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3769
3770         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3771         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3772         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3773
3774         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3775                 enable = !!(hdev->clock_gating_mask &
3776                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3777
3778                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3779                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3780                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3781                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3782
3783                 qman_offset += TPC_QMAN_OFFSET;
3784         }
3785
3786         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3787 }
3788
3789 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3790 {
3791         struct gaudi_device *gaudi = hdev->asic_specific;
3792         u32 qman_offset;
3793         int i;
3794
3795         if (hdev->asic_prop.fw_security_enabled)
3796                 return;
3797
3798         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3799                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3800                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3801
3802                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3803         }
3804
3805         WREG32(mmMME0_QM_CGM_CFG, 0);
3806         WREG32(mmMME0_QM_CGM_CFG1, 0);
3807         WREG32(mmMME2_QM_CGM_CFG, 0);
3808         WREG32(mmMME2_QM_CGM_CFG1, 0);
3809
3810         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3811                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3812                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3813
3814                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3815         }
3816
3817         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3818 }
3819
3820 static void gaudi_enable_timestamp(struct hl_device *hdev)
3821 {
3822         /* Disable the timestamp counter */
3823         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3824
3825         /* Zero the lower/upper parts of the 64-bit counter */
3826         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3827         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3828
3829         /* Enable the counter */
3830         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3831 }
3832
3833 static void gaudi_disable_timestamp(struct hl_device *hdev)
3834 {
3835         /* Disable the timestamp counter */
3836         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3837 }
3838
3839 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3840 {
3841         u32 wait_timeout_ms;
3842
3843         dev_info(hdev->dev,
3844                 "Halting compute engines and disabling interrupts\n");
3845
3846         if (hdev->pldm)
3847                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3848         else
3849                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3850
3851         if (fw_reset)
3852                 goto skip_engines;
3853
3854         gaudi_stop_nic_qmans(hdev);
3855         gaudi_stop_mme_qmans(hdev);
3856         gaudi_stop_tpc_qmans(hdev);
3857         gaudi_stop_hbm_dma_qmans(hdev);
3858         gaudi_stop_pci_dma_qmans(hdev);
3859
3860         hdev->asic_funcs->disable_clock_gating(hdev);
3861
3862         msleep(wait_timeout_ms);
3863
3864         gaudi_pci_dma_stall(hdev);
3865         gaudi_hbm_dma_stall(hdev);
3866         gaudi_tpc_stall(hdev);
3867         gaudi_mme_stall(hdev);
3868
3869         msleep(wait_timeout_ms);
3870
3871         gaudi_disable_nic_qmans(hdev);
3872         gaudi_disable_mme_qmans(hdev);
3873         gaudi_disable_tpc_qmans(hdev);
3874         gaudi_disable_hbm_dma_qmans(hdev);
3875         gaudi_disable_pci_dma_qmans(hdev);
3876
3877         gaudi_disable_timestamp(hdev);
3878
3879 skip_engines:
3880         gaudi_disable_msi(hdev);
3881 }
3882
3883 static int gaudi_mmu_init(struct hl_device *hdev)
3884 {
3885         struct asic_fixed_properties *prop = &hdev->asic_prop;
3886         struct gaudi_device *gaudi = hdev->asic_specific;
3887         u64 hop0_addr;
3888         int rc, i;
3889
3890         if (!hdev->mmu_enable)
3891                 return 0;
3892
3893         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3894                 return 0;
3895
3896         for (i = 0 ; i < prop->max_asid ; i++) {
3897                 hop0_addr = prop->mmu_pgt_addr +
3898                                 (i * prop->mmu_hop_table_size);
3899
3900                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3901                 if (rc) {
3902                         dev_err(hdev->dev,
3903                                 "failed to set hop0 addr for asid %d\n", i);
3904                         goto err;
3905                 }
3906         }
3907
3908         /* init MMU cache manage page */
3909         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3910         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3911
3912         /* mem cache invalidation */
3913         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3914
3915         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3916
3917         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3918         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3919
3920         WREG32(mmSTLB_HOP_CONFIGURATION,
3921                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3922
3923         /*
3924          * The H/W expects the first PI after init to be 1. After wraparound
3925          * we'll write 0.
3926          */
3927         gaudi->mmu_cache_inv_pi = 1;
3928
3929         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3930
3931         return 0;
3932
3933 err:
3934         return rc;
3935 }
3936
3937 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3938 {
3939         void __iomem *dst;
3940
3941         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3942
3943         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3944 }
3945
3946 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3947 {
3948         void __iomem *dst;
3949
3950         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3951
3952         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3953 }
3954
3955 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3956 {
3957         struct dynamic_fw_load_mgr *dynamic_loader;
3958         struct cpu_dyn_regs *dyn_regs;
3959
3960         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3961
3962         /*
3963          * here we update initial values for few specific dynamic regs (as
3964          * before reading the first descriptor from FW those value has to be
3965          * hard-coded) in later stages of the protocol those values will be
3966          * updated automatically by reading the FW descriptor so data there
3967          * will always be up-to-date
3968          */
3969         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3970         dyn_regs->kmd_msg_to_cpu =
3971                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3972         dyn_regs->cpu_cmd_status_to_host =
3973                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3974
3975         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3976 }
3977
3978 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3979 {
3980         struct static_fw_load_mgr *static_loader;
3981
3982         static_loader = &hdev->fw_loader.static_loader;
3983
3984         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3985         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3986         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3987         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3988         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3989         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3990         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3991         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3992         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3993         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3994         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3995         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3996         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3997                         GAUDI_PLDM_RESET_WAIT_MSEC :
3998                         GAUDI_CPU_RESET_WAIT_MSEC;
3999 }
4000
4001 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4002 {
4003         struct asic_fixed_properties *prop = &hdev->asic_prop;
4004         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4005
4006         /* fill common fields */
4007         fw_loader->linux_loaded = false;
4008         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4009         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4010         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4011         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4012         fw_loader->skip_bmc = !hdev->bmc_enable;
4013         fw_loader->sram_bar_id = SRAM_BAR_ID;
4014         fw_loader->dram_bar_id = HBM_BAR_ID;
4015
4016         if (prop->dynamic_fw_load)
4017                 gaudi_init_dynamic_firmware_loader(hdev);
4018         else
4019                 gaudi_init_static_firmware_loader(hdev);
4020 }
4021
4022 static int gaudi_init_cpu(struct hl_device *hdev)
4023 {
4024         struct gaudi_device *gaudi = hdev->asic_specific;
4025         int rc;
4026
4027         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4028                 return 0;
4029
4030         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4031                 return 0;
4032
4033         /*
4034          * The device CPU works with 40 bits addresses.
4035          * This register sets the extension to 50 bits.
4036          */
4037         if (!hdev->asic_prop.fw_security_enabled)
4038                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4039
4040         rc = hl_fw_init_cpu(hdev);
4041
4042         if (rc)
4043                 return rc;
4044
4045         gaudi->hw_cap_initialized |= HW_CAP_CPU;
4046
4047         return 0;
4048 }
4049
4050 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4051 {
4052         struct cpu_dyn_regs *dyn_regs =
4053                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4054         struct asic_fixed_properties *prop = &hdev->asic_prop;
4055         struct gaudi_device *gaudi = hdev->asic_specific;
4056         u32 status, irq_handler_offset;
4057         struct hl_eq *eq;
4058         struct hl_hw_queue *cpu_pq =
4059                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4060         int err;
4061
4062         if (!hdev->cpu_queues_enable)
4063                 return 0;
4064
4065         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4066                 return 0;
4067
4068         eq = &hdev->event_queue;
4069
4070         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4071         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4072
4073         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4074         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4075
4076         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4077                         lower_32_bits(hdev->cpu_accessible_dma_address));
4078         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4079                         upper_32_bits(hdev->cpu_accessible_dma_address));
4080
4081         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4082         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4083         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4084
4085         /* Used for EQ CI */
4086         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4087
4088         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4089
4090         if (gaudi->multi_msi_mode)
4091                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4092         else
4093                 WREG32(mmCPU_IF_QUEUE_INIT,
4094                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4095
4096         irq_handler_offset = prop->gic_interrupts_enable ?
4097                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4098                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4099
4100         WREG32(irq_handler_offset,
4101                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4102
4103         err = hl_poll_timeout(
4104                 hdev,
4105                 mmCPU_IF_QUEUE_INIT,
4106                 status,
4107                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4108                 1000,
4109                 cpu_timeout);
4110
4111         if (err) {
4112                 dev_err(hdev->dev,
4113                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4114                 return -EIO;
4115         }
4116
4117         /* update FW application security bits */
4118         if (prop->fw_cpu_boot_dev_sts0_valid)
4119                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4120         if (prop->fw_cpu_boot_dev_sts1_valid)
4121                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4122
4123         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4124         return 0;
4125 }
4126
4127 static void gaudi_pre_hw_init(struct hl_device *hdev)
4128 {
4129         /* Perform read from the device to make sure device is up */
4130         RREG32(mmHW_STATE);
4131
4132         if (!hdev->asic_prop.fw_security_enabled) {
4133                 /* Set the access through PCI bars (Linux driver only) as
4134                  * secured
4135                  */
4136                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4137                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4138                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4139
4140                 /* Perform read to flush the waiting writes to ensure
4141                  * configuration was set in the device
4142                  */
4143                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4144         }
4145
4146         /*
4147          * Let's mark in the H/W that we have reached this point. We check
4148          * this value in the reset_before_init function to understand whether
4149          * we need to reset the chip before doing H/W init. This register is
4150          * cleared by the H/W upon H/W reset
4151          */
4152         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4153 }
4154
4155 static int gaudi_hw_init(struct hl_device *hdev)
4156 {
4157         struct gaudi_device *gaudi = hdev->asic_specific;
4158         int rc;
4159
4160         gaudi_pre_hw_init(hdev);
4161
4162         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4163          * So we set it here and if anyone tries to move it later to
4164          * a different address, there will be an error
4165          */
4166         if (hdev->asic_prop.iatu_done_by_fw)
4167                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4168
4169         /*
4170          * Before pushing u-boot/linux to device, need to set the hbm bar to
4171          * base address of dram
4172          */
4173         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4174                 dev_err(hdev->dev,
4175                         "failed to map HBM bar to DRAM base address\n");
4176                 return -EIO;
4177         }
4178
4179         rc = gaudi_init_cpu(hdev);
4180         if (rc) {
4181                 dev_err(hdev->dev, "failed to initialize CPU\n");
4182                 return rc;
4183         }
4184
4185         /* In case the clock gating was enabled in preboot we need to disable
4186          * it here before touching the MME/TPC registers.
4187          * There is no need to take clk gating mutex because when this function
4188          * runs, no other relevant code can run
4189          */
4190         hdev->asic_funcs->disable_clock_gating(hdev);
4191
4192         /* SRAM scrambler must be initialized after CPU is running from HBM */
4193         gaudi_init_scrambler_sram(hdev);
4194
4195         /* This is here just in case we are working without CPU */
4196         gaudi_init_scrambler_hbm(hdev);
4197
4198         gaudi_init_golden_registers(hdev);
4199
4200         rc = gaudi_mmu_init(hdev);
4201         if (rc)
4202                 return rc;
4203
4204         gaudi_init_security(hdev);
4205
4206         gaudi_init_pci_dma_qmans(hdev);
4207
4208         gaudi_init_hbm_dma_qmans(hdev);
4209
4210         gaudi_init_mme_qmans(hdev);
4211
4212         gaudi_init_tpc_qmans(hdev);
4213
4214         gaudi_init_nic_qmans(hdev);
4215
4216         hdev->asic_funcs->set_clock_gating(hdev);
4217
4218         gaudi_enable_timestamp(hdev);
4219
4220         /* MSI must be enabled before CPU queues and NIC are initialized */
4221         rc = gaudi_enable_msi(hdev);
4222         if (rc)
4223                 goto disable_queues;
4224
4225         /* must be called after MSI was enabled */
4226         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4227         if (rc) {
4228                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4229                         rc);
4230                 goto disable_msi;
4231         }
4232
4233         /* Perform read from the device to flush all configuration */
4234         RREG32(mmHW_STATE);
4235
4236         return 0;
4237
4238 disable_msi:
4239         gaudi_disable_msi(hdev);
4240 disable_queues:
4241         gaudi_disable_mme_qmans(hdev);
4242         gaudi_disable_pci_dma_qmans(hdev);
4243
4244         return rc;
4245 }
4246
4247 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4248 {
4249         struct cpu_dyn_regs *dyn_regs =
4250                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4251         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4252         struct gaudi_device *gaudi = hdev->asic_specific;
4253         bool driver_performs_reset;
4254
4255         if (!hard_reset) {
4256                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4257                 return;
4258         }
4259
4260         if (hdev->pldm) {
4261                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4262                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4263         } else {
4264                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4265                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4266         }
4267
4268         if (fw_reset) {
4269                 dev_info(hdev->dev,
4270                         "Firmware performs HARD reset, going to wait %dms\n",
4271                         reset_timeout_ms);
4272
4273                 goto skip_reset;
4274         }
4275
4276         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4277                                         !hdev->asic_prop.hard_reset_done_by_fw);
4278
4279         /* Set device to handle FLR by H/W as we will put the device CPU to
4280          * halt mode
4281          */
4282         if (driver_performs_reset)
4283                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4284                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4285
4286         /* If linux is loaded in the device CPU we need to communicate with it
4287          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4288          * registers in case of old F/Ws
4289          */
4290         if (hdev->fw_loader.linux_loaded) {
4291                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4292                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4293                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4294
4295                 WREG32(irq_handler_offset,
4296                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4297         } else {
4298                 if (hdev->asic_prop.hard_reset_done_by_fw)
4299                         hl_fw_ask_hard_reset_without_linux(hdev);
4300                 else
4301                         hl_fw_ask_halt_machine_without_linux(hdev);
4302         }
4303
4304         if (driver_performs_reset) {
4305
4306                 /* Configure the reset registers. Must be done as early as
4307                  * possible in case we fail during H/W initialization
4308                  */
4309                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4310                                                 (CFG_RST_H_DMA_MASK |
4311                                                 CFG_RST_H_MME_MASK |
4312                                                 CFG_RST_H_SM_MASK |
4313                                                 CFG_RST_H_TPC_7_MASK));
4314
4315                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4316
4317                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4318                                                 (CFG_RST_H_HBM_MASK |
4319                                                 CFG_RST_H_TPC_7_MASK |
4320                                                 CFG_RST_H_NIC_MASK |
4321                                                 CFG_RST_H_SM_MASK |
4322                                                 CFG_RST_H_DMA_MASK |
4323                                                 CFG_RST_H_MME_MASK |
4324                                                 CFG_RST_H_CPU_MASK |
4325                                                 CFG_RST_H_MMU_MASK));
4326
4327                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4328                                                 (CFG_RST_L_IF_MASK |
4329                                                 CFG_RST_L_PSOC_MASK |
4330                                                 CFG_RST_L_TPC_MASK));
4331
4332                 msleep(cpu_timeout_ms);
4333
4334                 /* Tell ASIC not to re-initialize PCIe */
4335                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4336
4337                 /* Restart BTL/BLR upon hard-reset */
4338                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4339
4340                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4341                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4342
4343                 dev_info(hdev->dev,
4344                         "Issued HARD reset command, going to wait %dms\n",
4345                         reset_timeout_ms);
4346         } else {
4347                 dev_info(hdev->dev,
4348                         "Firmware performs HARD reset, going to wait %dms\n",
4349                         reset_timeout_ms);
4350         }
4351
4352 skip_reset:
4353         /*
4354          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4355          * itself is in reset. Need to wait until the reset is deasserted
4356          */
4357         msleep(reset_timeout_ms);
4358
4359         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4360         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4361                 dev_err(hdev->dev,
4362                         "Timeout while waiting for device to reset 0x%x\n",
4363                         status);
4364
4365         if (gaudi) {
4366                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4367                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4368                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4369                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4370                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4371                                 HW_CAP_SRAM_SCRAMBLER |
4372                                 HW_CAP_HBM_SCRAMBLER |
4373                                 HW_CAP_CLK_GATE);
4374
4375                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4376
4377                 hdev->device_cpu_is_halted = false;
4378         }
4379 }
4380
4381 static int gaudi_suspend(struct hl_device *hdev)
4382 {
4383         int rc;
4384
4385         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4386         if (rc)
4387                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4388
4389         return rc;
4390 }
4391
4392 static int gaudi_resume(struct hl_device *hdev)
4393 {
4394         return gaudi_init_iatu(hdev);
4395 }
4396
4397 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4398                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4399 {
4400         int rc;
4401
4402         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4403                         VM_DONTCOPY | VM_NORESERVE;
4404
4405         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4406                                 (dma_addr - HOST_PHYS_BASE), size);
4407         if (rc)
4408                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4409
4410         return rc;
4411 }
4412
4413 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4414 {
4415         struct cpu_dyn_regs *dyn_regs =
4416                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4417         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4418         struct gaudi_device *gaudi = hdev->asic_specific;
4419         bool invalid_queue = false;
4420         int dma_id;
4421
4422         switch (hw_queue_id) {
4423         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4424                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4425                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4426                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4427                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4428                 break;
4429
4430         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4431                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4432                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4433                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4434                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4435                 break;
4436
4437         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4438                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4439                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4440                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4441                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4442                 break;
4443
4444         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4445                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4446                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4447                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4448                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4449                 break;
4450
4451         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4452                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4453                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4454                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4455                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4459                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4460                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4461                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4462                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4463                 break;
4464
4465         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4466                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4467                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4468                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4469                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4470                 break;
4471
4472         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4473                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4474                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4475                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4476                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4477                 break;
4478
4479         case GAUDI_QUEUE_ID_CPU_PQ:
4480                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4481                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4482                 else
4483                         invalid_queue = true;
4484                 break;
4485
4486         case GAUDI_QUEUE_ID_MME_0_0:
4487                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_MME_0_1:
4491                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4492                 break;
4493
4494         case GAUDI_QUEUE_ID_MME_0_2:
4495                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_MME_0_3:
4499                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4500                 break;
4501
4502         case GAUDI_QUEUE_ID_MME_1_0:
4503                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_MME_1_1:
4507                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4508                 break;
4509
4510         case GAUDI_QUEUE_ID_MME_1_2:
4511                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_MME_1_3:
4515                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4516                 break;
4517
4518         case GAUDI_QUEUE_ID_TPC_0_0:
4519                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_TPC_0_1:
4523                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4524                 break;
4525
4526         case GAUDI_QUEUE_ID_TPC_0_2:
4527                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_TPC_0_3:
4531                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4532                 break;
4533
4534         case GAUDI_QUEUE_ID_TPC_1_0:
4535                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_TPC_1_1:
4539                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4540                 break;
4541
4542         case GAUDI_QUEUE_ID_TPC_1_2:
4543                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4544                 break;
4545
4546         case GAUDI_QUEUE_ID_TPC_1_3:
4547                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4548                 break;
4549
4550         case GAUDI_QUEUE_ID_TPC_2_0:
4551                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4552                 break;
4553
4554         case GAUDI_QUEUE_ID_TPC_2_1:
4555                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4556                 break;
4557
4558         case GAUDI_QUEUE_ID_TPC_2_2:
4559                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4560                 break;
4561
4562         case GAUDI_QUEUE_ID_TPC_2_3:
4563                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4564                 break;
4565
4566         case GAUDI_QUEUE_ID_TPC_3_0:
4567                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4568                 break;
4569
4570         case GAUDI_QUEUE_ID_TPC_3_1:
4571                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4572                 break;
4573
4574         case GAUDI_QUEUE_ID_TPC_3_2:
4575                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4576                 break;
4577
4578         case GAUDI_QUEUE_ID_TPC_3_3:
4579                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4580                 break;
4581
4582         case GAUDI_QUEUE_ID_TPC_4_0:
4583                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4584                 break;
4585
4586         case GAUDI_QUEUE_ID_TPC_4_1:
4587                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4588                 break;
4589
4590         case GAUDI_QUEUE_ID_TPC_4_2:
4591                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4592                 break;
4593
4594         case GAUDI_QUEUE_ID_TPC_4_3:
4595                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4596                 break;
4597
4598         case GAUDI_QUEUE_ID_TPC_5_0:
4599                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4600                 break;
4601
4602         case GAUDI_QUEUE_ID_TPC_5_1:
4603                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4604                 break;
4605
4606         case GAUDI_QUEUE_ID_TPC_5_2:
4607                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4608                 break;
4609
4610         case GAUDI_QUEUE_ID_TPC_5_3:
4611                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4612                 break;
4613
4614         case GAUDI_QUEUE_ID_TPC_6_0:
4615                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4616                 break;
4617
4618         case GAUDI_QUEUE_ID_TPC_6_1:
4619                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4620                 break;
4621
4622         case GAUDI_QUEUE_ID_TPC_6_2:
4623                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4624                 break;
4625
4626         case GAUDI_QUEUE_ID_TPC_6_3:
4627                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4628                 break;
4629
4630         case GAUDI_QUEUE_ID_TPC_7_0:
4631                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4632                 break;
4633
4634         case GAUDI_QUEUE_ID_TPC_7_1:
4635                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4636                 break;
4637
4638         case GAUDI_QUEUE_ID_TPC_7_2:
4639                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4640                 break;
4641
4642         case GAUDI_QUEUE_ID_TPC_7_3:
4643                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4644                 break;
4645
4646         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4647                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4648                         invalid_queue = true;
4649
4650                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4651                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4652                 break;
4653
4654         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4655                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4656                         invalid_queue = true;
4657
4658                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4659                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4660                 break;
4661
4662         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4663                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4664                         invalid_queue = true;
4665
4666                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4667                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4668                 break;
4669
4670         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4671                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4672                         invalid_queue = true;
4673
4674                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4675                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4676                 break;
4677
4678         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4679                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4680                         invalid_queue = true;
4681
4682                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4683                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4684                 break;
4685
4686         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4687                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4688                         invalid_queue = true;
4689
4690                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4691                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4692                 break;
4693
4694         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4695                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4696                         invalid_queue = true;
4697
4698                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4699                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4700                 break;
4701
4702         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4703                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4704                         invalid_queue = true;
4705
4706                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4707                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4708                 break;
4709
4710         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4711                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4712                         invalid_queue = true;
4713
4714                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4715                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4716                 break;
4717
4718         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4719                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4720                         invalid_queue = true;
4721
4722                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4723                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4724                 break;
4725
4726         default:
4727                 invalid_queue = true;
4728         }
4729
4730         if (invalid_queue) {
4731                 /* Should never get here */
4732                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4733                         hw_queue_id);
4734                 return;
4735         }
4736
4737         db_value = pi;
4738
4739         /* ring the doorbell */
4740         WREG32(db_reg_offset, db_value);
4741
4742         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4743                 /* make sure device CPU will read latest data from host */
4744                 mb();
4745
4746                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4747                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4748                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4749
4750                 WREG32(irq_handler_offset,
4751                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4752         }
4753 }
4754
4755 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4756                                 struct hl_bd *bd)
4757 {
4758         __le64 *pbd = (__le64 *) bd;
4759
4760         /* The QMANs are on the host memory so a simple copy suffice */
4761         pqe[0] = pbd[0];
4762         pqe[1] = pbd[1];
4763 }
4764
4765 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4766                                         dma_addr_t *dma_handle, gfp_t flags)
4767 {
4768         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4769                                                 dma_handle, flags);
4770
4771         /* Shift to the device's base physical address of host memory */
4772         if (kernel_addr)
4773                 *dma_handle += HOST_PHYS_BASE;
4774
4775         return kernel_addr;
4776 }
4777
4778 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4779                 void *cpu_addr, dma_addr_t dma_handle)
4780 {
4781         /* Cancel the device's base physical address of host memory */
4782         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4783
4784         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4785 }
4786
4787 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4788 {
4789         struct asic_fixed_properties *prop = &hdev->asic_prop;
4790         u64  cur_addr = DRAM_BASE_ADDR_USER;
4791         u32 val;
4792         u32 chunk_size;
4793         int rc, dma_id;
4794
4795         while (cur_addr < prop->dram_end_address) {
4796                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4797                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4798
4799                         chunk_size =
4800                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4801
4802                         dev_dbg(hdev->dev,
4803                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4804                                 cur_addr, cur_addr + chunk_size);
4805
4806                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4807                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4808                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4809                                                 lower_32_bits(cur_addr));
4810                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4811                                                 upper_32_bits(cur_addr));
4812                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4813                                         chunk_size);
4814                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4815                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4816                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4817
4818                         cur_addr += chunk_size;
4819
4820                         if (cur_addr == prop->dram_end_address)
4821                                 break;
4822                 }
4823
4824                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4825                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4826
4827                         rc = hl_poll_timeout(
4828                                 hdev,
4829                                 mmDMA0_CORE_STS0 + dma_offset,
4830                                 val,
4831                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4832                                 1000,
4833                                 HBM_SCRUBBING_TIMEOUT_US);
4834
4835                         if (rc) {
4836                                 dev_err(hdev->dev,
4837                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4838                                         dma_id);
4839                                 return -EIO;
4840                         }
4841                 }
4842         }
4843
4844         return 0;
4845 }
4846
4847 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4848 {
4849         struct asic_fixed_properties *prop = &hdev->asic_prop;
4850         struct gaudi_device *gaudi = hdev->asic_specific;
4851         int rc = 0;
4852         u64 val = 0;
4853
4854         if (!hdev->memory_scrub)
4855                 return 0;
4856
4857         if (!addr && !size) {
4858                 /* Wait till device is idle */
4859                 rc = hl_poll_timeout(
4860                                 hdev,
4861                                 mmDMA0_CORE_STS0/* dummy */,
4862                                 val/* dummy */,
4863                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4864                                                 0, NULL)),
4865                                                 1000,
4866                                                 HBM_SCRUBBING_TIMEOUT_US);
4867                 if (rc) {
4868                         dev_err(hdev->dev, "waiting for idle timeout\n");
4869                         return -EIO;
4870                 }
4871
4872                 /* Scrub SRAM */
4873                 addr = prop->sram_user_base_address;
4874                 size = hdev->pldm ? 0x10000 :
4875                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4876                 val = 0x7777777777777777ull;
4877
4878                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4879                 if (rc) {
4880                         dev_err(hdev->dev,
4881                                 "Failed to clear SRAM in mem scrub all\n");
4882                         return rc;
4883                 }
4884
4885                 mutex_lock(&gaudi->clk_gate_mutex);
4886                 hdev->asic_funcs->disable_clock_gating(hdev);
4887
4888                 /* Scrub HBM using all DMA channels in parallel */
4889                 rc = gaudi_hbm_scrubbing(hdev);
4890                 if (rc)
4891                         dev_err(hdev->dev,
4892                                 "Failed to clear HBM in mem scrub all\n");
4893
4894                 hdev->asic_funcs->set_clock_gating(hdev);
4895                 mutex_unlock(&gaudi->clk_gate_mutex);
4896         }
4897
4898         return rc;
4899 }
4900
4901 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4902                                 u32 queue_id, dma_addr_t *dma_handle,
4903                                 u16 *queue_len)
4904 {
4905         struct gaudi_device *gaudi = hdev->asic_specific;
4906         struct gaudi_internal_qman_info *q;
4907
4908         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4909                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4910                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4911                 return NULL;
4912         }
4913
4914         q = &gaudi->internal_qmans[queue_id];
4915         *dma_handle = q->pq_dma_addr;
4916         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4917
4918         return q->pq_kernel_addr;
4919 }
4920
4921 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4922                                 u16 len, u32 timeout, u64 *result)
4923 {
4924         struct gaudi_device *gaudi = hdev->asic_specific;
4925
4926         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4927                 if (result)
4928                         *result = 0;
4929                 return 0;
4930         }
4931
4932         if (!timeout)
4933                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4934
4935         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4936                                                 timeout, result);
4937 }
4938
4939 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4940 {
4941         struct packet_msg_prot *fence_pkt;
4942         dma_addr_t pkt_dma_addr;
4943         u32 fence_val, tmp, timeout_usec;
4944         dma_addr_t fence_dma_addr;
4945         u32 *fence_ptr;
4946         int rc;
4947
4948         if (hdev->pldm)
4949                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4950         else
4951                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4952
4953         fence_val = GAUDI_QMAN0_FENCE_VAL;
4954
4955         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4956                                                         &fence_dma_addr);
4957         if (!fence_ptr) {
4958                 dev_err(hdev->dev,
4959                         "Failed to allocate memory for H/W queue %d testing\n",
4960                         hw_queue_id);
4961                 return -ENOMEM;
4962         }
4963
4964         *fence_ptr = 0;
4965
4966         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4967                                         sizeof(struct packet_msg_prot),
4968                                         GFP_KERNEL, &pkt_dma_addr);
4969         if (!fence_pkt) {
4970                 dev_err(hdev->dev,
4971                         "Failed to allocate packet for H/W queue %d testing\n",
4972                         hw_queue_id);
4973                 rc = -ENOMEM;
4974                 goto free_fence_ptr;
4975         }
4976
4977         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4978         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4979         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4980
4981         fence_pkt->ctl = cpu_to_le32(tmp);
4982         fence_pkt->value = cpu_to_le32(fence_val);
4983         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4984
4985         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4986                                         sizeof(struct packet_msg_prot),
4987                                         pkt_dma_addr);
4988         if (rc) {
4989                 dev_err(hdev->dev,
4990                         "Failed to send fence packet to H/W queue %d\n",
4991                         hw_queue_id);
4992                 goto free_pkt;
4993         }
4994
4995         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4996                                         1000, timeout_usec, true);
4997
4998         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4999
5000         if (rc == -ETIMEDOUT) {
5001                 dev_err(hdev->dev,
5002                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5003                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5004                 rc = -EIO;
5005         }
5006
5007 free_pkt:
5008         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5009                                         pkt_dma_addr);
5010 free_fence_ptr:
5011         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5012                                         fence_dma_addr);
5013         return rc;
5014 }
5015
5016 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5017 {
5018         struct gaudi_device *gaudi = hdev->asic_specific;
5019
5020         /*
5021          * check capability here as send_cpu_message() won't update the result
5022          * value if no capability
5023          */
5024         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5025                 return 0;
5026
5027         return hl_fw_test_cpu_queue(hdev);
5028 }
5029
5030 static int gaudi_test_queues(struct hl_device *hdev)
5031 {
5032         int i, rc, ret_val = 0;
5033
5034         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5035                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5036                         rc = gaudi_test_queue(hdev, i);
5037                         if (rc)
5038                                 ret_val = -EINVAL;
5039                 }
5040         }
5041
5042         rc = gaudi_test_cpu_queue(hdev);
5043         if (rc)
5044                 ret_val = -EINVAL;
5045
5046         return ret_val;
5047 }
5048
5049 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5050                 gfp_t mem_flags, dma_addr_t *dma_handle)
5051 {
5052         void *kernel_addr;
5053
5054         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5055                 return NULL;
5056
5057         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5058
5059         /* Shift to the device's base physical address of host memory */
5060         if (kernel_addr)
5061                 *dma_handle += HOST_PHYS_BASE;
5062
5063         return kernel_addr;
5064 }
5065
5066 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5067                         dma_addr_t dma_addr)
5068 {
5069         /* Cancel the device's base physical address of host memory */
5070         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5071
5072         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5073 }
5074
5075 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5076                                         size_t size, dma_addr_t *dma_handle)
5077 {
5078         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5079 }
5080
5081 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5082                                                 size_t size, void *vaddr)
5083 {
5084         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5085 }
5086
5087 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5088                         int nents, enum dma_data_direction dir)
5089 {
5090         struct scatterlist *sg;
5091         int i;
5092
5093         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5094                 return -ENOMEM;
5095
5096         /* Shift to the device's base physical address of host memory */
5097         for_each_sg(sgl, sg, nents, i)
5098                 sg->dma_address += HOST_PHYS_BASE;
5099
5100         return 0;
5101 }
5102
5103 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5104                         int nents, enum dma_data_direction dir)
5105 {
5106         struct scatterlist *sg;
5107         int i;
5108
5109         /* Cancel the device's base physical address of host memory */
5110         for_each_sg(sgl, sg, nents, i)
5111                 sg->dma_address -= HOST_PHYS_BASE;
5112
5113         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5114 }
5115
5116 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5117                                         struct sg_table *sgt)
5118 {
5119         struct scatterlist *sg, *sg_next_iter;
5120         u32 count, dma_desc_cnt;
5121         u64 len, len_next;
5122         dma_addr_t addr, addr_next;
5123
5124         dma_desc_cnt = 0;
5125
5126         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5127
5128                 len = sg_dma_len(sg);
5129                 addr = sg_dma_address(sg);
5130
5131                 if (len == 0)
5132                         break;
5133
5134                 while ((count + 1) < sgt->nents) {
5135                         sg_next_iter = sg_next(sg);
5136                         len_next = sg_dma_len(sg_next_iter);
5137                         addr_next = sg_dma_address(sg_next_iter);
5138
5139                         if (len_next == 0)
5140                                 break;
5141
5142                         if ((addr + len == addr_next) &&
5143                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5144                                 len += len_next;
5145                                 count++;
5146                                 sg = sg_next_iter;
5147                         } else {
5148                                 break;
5149                         }
5150                 }
5151
5152                 dma_desc_cnt++;
5153         }
5154
5155         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5156 }
5157
5158 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5159                                 struct hl_cs_parser *parser,
5160                                 struct packet_lin_dma *user_dma_pkt,
5161                                 u64 addr, enum dma_data_direction dir)
5162 {
5163         struct hl_userptr *userptr;
5164         int rc;
5165
5166         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5167                         parser->job_userptr_list, &userptr))
5168                 goto already_pinned;
5169
5170         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5171         if (!userptr)
5172                 return -ENOMEM;
5173
5174         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5175                                 userptr);
5176         if (rc)
5177                 goto free_userptr;
5178
5179         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5180
5181         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5182                                         userptr->sgt->nents, dir);
5183         if (rc) {
5184                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5185                 goto unpin_memory;
5186         }
5187
5188         userptr->dma_mapped = true;
5189         userptr->dir = dir;
5190
5191 already_pinned:
5192         parser->patched_cb_size +=
5193                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5194
5195         return 0;
5196
5197 unpin_memory:
5198         list_del(&userptr->job_node);
5199         hl_unpin_host_memory(hdev, userptr);
5200 free_userptr:
5201         kfree(userptr);
5202         return rc;
5203 }
5204
5205 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5206                                 struct hl_cs_parser *parser,
5207                                 struct packet_lin_dma *user_dma_pkt,
5208                                 bool src_in_host)
5209 {
5210         enum dma_data_direction dir;
5211         bool skip_host_mem_pin = false, user_memset;
5212         u64 addr;
5213         int rc = 0;
5214
5215         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5216                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5217                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5218
5219         if (src_in_host) {
5220                 if (user_memset)
5221                         skip_host_mem_pin = true;
5222
5223                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5224                 dir = DMA_TO_DEVICE;
5225                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5226         } else {
5227                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5228                 dir = DMA_FROM_DEVICE;
5229                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5230                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5231                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5232         }
5233
5234         if (skip_host_mem_pin)
5235                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5236         else
5237                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5238                                                 addr, dir);
5239
5240         return rc;
5241 }
5242
5243 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5244                                 struct hl_cs_parser *parser,
5245                                 struct packet_lin_dma *user_dma_pkt)
5246 {
5247         bool src_in_host = false;
5248         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5249                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5250                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5251
5252         dev_dbg(hdev->dev, "DMA packet details:\n");
5253         dev_dbg(hdev->dev, "source == 0x%llx\n",
5254                                 le64_to_cpu(user_dma_pkt->src_addr));
5255         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5256         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5257
5258         /*
5259          * Special handling for DMA with size 0. Bypass all validations
5260          * because no transactions will be done except for WR_COMP, which
5261          * is not a security issue
5262          */
5263         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5264                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5265                 return 0;
5266         }
5267
5268         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5269                 src_in_host = true;
5270
5271         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5272                                                 src_in_host);
5273 }
5274
5275 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5276                                         struct hl_cs_parser *parser,
5277                                         struct packet_load_and_exe *user_pkt)
5278 {
5279         u32 cfg;
5280
5281         cfg = le32_to_cpu(user_pkt->cfg);
5282
5283         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5284                 dev_err(hdev->dev,
5285                         "User not allowed to use Load and Execute\n");
5286                 return -EPERM;
5287         }
5288
5289         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5290
5291         return 0;
5292 }
5293
5294 static int gaudi_validate_cb(struct hl_device *hdev,
5295                         struct hl_cs_parser *parser, bool is_mmu)
5296 {
5297         u32 cb_parsed_length = 0;
5298         int rc = 0;
5299
5300         parser->patched_cb_size = 0;
5301
5302         /* cb_user_size is more than 0 so loop will always be executed */
5303         while (cb_parsed_length < parser->user_cb_size) {
5304                 enum packet_id pkt_id;
5305                 u16 pkt_size;
5306                 struct gaudi_packet *user_pkt;
5307
5308                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5309
5310                 pkt_id = (enum packet_id) (
5311                                 (le64_to_cpu(user_pkt->header) &
5312                                 PACKET_HEADER_PACKET_ID_MASK) >>
5313                                         PACKET_HEADER_PACKET_ID_SHIFT);
5314
5315                 if (!validate_packet_id(pkt_id)) {
5316                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5317                         rc = -EINVAL;
5318                         break;
5319                 }
5320
5321                 pkt_size = gaudi_packet_sizes[pkt_id];
5322                 cb_parsed_length += pkt_size;
5323                 if (cb_parsed_length > parser->user_cb_size) {
5324                         dev_err(hdev->dev,
5325                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5326                         rc = -EINVAL;
5327                         break;
5328                 }
5329
5330                 switch (pkt_id) {
5331                 case PACKET_MSG_PROT:
5332                         dev_err(hdev->dev,
5333                                 "User not allowed to use MSG_PROT\n");
5334                         rc = -EPERM;
5335                         break;
5336
5337                 case PACKET_CP_DMA:
5338                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5339                         rc = -EPERM;
5340                         break;
5341
5342                 case PACKET_STOP:
5343                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5344                         rc = -EPERM;
5345                         break;
5346
5347                 case PACKET_WREG_BULK:
5348                         dev_err(hdev->dev,
5349                                 "User not allowed to use WREG_BULK\n");
5350                         rc = -EPERM;
5351                         break;
5352
5353                 case PACKET_LOAD_AND_EXE:
5354                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5355                                 (struct packet_load_and_exe *) user_pkt);
5356                         break;
5357
5358                 case PACKET_LIN_DMA:
5359                         parser->contains_dma_pkt = true;
5360                         if (is_mmu)
5361                                 parser->patched_cb_size += pkt_size;
5362                         else
5363                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5364                                         (struct packet_lin_dma *) user_pkt);
5365                         break;
5366
5367                 case PACKET_WREG_32:
5368                 case PACKET_MSG_LONG:
5369                 case PACKET_MSG_SHORT:
5370                 case PACKET_REPEAT:
5371                 case PACKET_FENCE:
5372                 case PACKET_NOP:
5373                 case PACKET_ARB_POINT:
5374                         parser->patched_cb_size += pkt_size;
5375                         break;
5376
5377                 default:
5378                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5379                                 pkt_id);
5380                         rc = -EINVAL;
5381                         break;
5382                 }
5383
5384                 if (rc)
5385                         break;
5386         }
5387
5388         /*
5389          * The new CB should have space at the end for two MSG_PROT packets:
5390          * 1. A packet that will act as a completion packet
5391          * 2. A packet that will generate MSI-X interrupt
5392          */
5393         if (parser->completion)
5394                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5395
5396         return rc;
5397 }
5398
5399 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5400                                 struct hl_cs_parser *parser,
5401                                 struct packet_lin_dma *user_dma_pkt,
5402                                 struct packet_lin_dma *new_dma_pkt,
5403                                 u32 *new_dma_pkt_size)
5404 {
5405         struct hl_userptr *userptr;
5406         struct scatterlist *sg, *sg_next_iter;
5407         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5408         u64 len, len_next;
5409         dma_addr_t dma_addr, dma_addr_next;
5410         u64 device_memory_addr, addr;
5411         enum dma_data_direction dir;
5412         struct sg_table *sgt;
5413         bool src_in_host = false;
5414         bool skip_host_mem_pin = false;
5415         bool user_memset;
5416
5417         ctl = le32_to_cpu(user_dma_pkt->ctl);
5418
5419         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5420                 src_in_host = true;
5421
5422         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5423                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5424
5425         if (src_in_host) {
5426                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5427                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5428                 dir = DMA_TO_DEVICE;
5429                 if (user_memset)
5430                         skip_host_mem_pin = true;
5431         } else {
5432                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5433                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5434                 dir = DMA_FROM_DEVICE;
5435         }
5436
5437         if ((!skip_host_mem_pin) &&
5438                 (!hl_userptr_is_pinned(hdev, addr,
5439                                         le32_to_cpu(user_dma_pkt->tsize),
5440                                         parser->job_userptr_list, &userptr))) {
5441                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5442                                 addr, user_dma_pkt->tsize);
5443                 return -EFAULT;
5444         }
5445
5446         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5447                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5448                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5449                 return 0;
5450         }
5451
5452         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5453
5454         sgt = userptr->sgt;
5455         dma_desc_cnt = 0;
5456
5457         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5458                 len = sg_dma_len(sg);
5459                 dma_addr = sg_dma_address(sg);
5460
5461                 if (len == 0)
5462                         break;
5463
5464                 while ((count + 1) < sgt->nents) {
5465                         sg_next_iter = sg_next(sg);
5466                         len_next = sg_dma_len(sg_next_iter);
5467                         dma_addr_next = sg_dma_address(sg_next_iter);
5468
5469                         if (len_next == 0)
5470                                 break;
5471
5472                         if ((dma_addr + len == dma_addr_next) &&
5473                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5474                                 len += len_next;
5475                                 count++;
5476                                 sg = sg_next_iter;
5477                         } else {
5478                                 break;
5479                         }
5480                 }
5481
5482                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5483                 if (likely(dma_desc_cnt))
5484                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5485                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5486                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5487                 new_dma_pkt->tsize = cpu_to_le32(len);
5488
5489                 if (dir == DMA_TO_DEVICE) {
5490                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5491                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5492                 } else {
5493                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5494                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5495                 }
5496
5497                 if (!user_memset)
5498                         device_memory_addr += len;
5499                 dma_desc_cnt++;
5500                 new_dma_pkt++;
5501         }
5502
5503         if (!dma_desc_cnt) {
5504                 dev_err(hdev->dev,
5505                         "Error of 0 SG entries when patching DMA packet\n");
5506                 return -EFAULT;
5507         }
5508
5509         /* Fix the last dma packet - wrcomp must be as user set it */
5510         new_dma_pkt--;
5511         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5512
5513         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5514
5515         return 0;
5516 }
5517
5518 static int gaudi_patch_cb(struct hl_device *hdev,
5519                                 struct hl_cs_parser *parser)
5520 {
5521         u32 cb_parsed_length = 0;
5522         u32 cb_patched_cur_length = 0;
5523         int rc = 0;
5524
5525         /* cb_user_size is more than 0 so loop will always be executed */
5526         while (cb_parsed_length < parser->user_cb_size) {
5527                 enum packet_id pkt_id;
5528                 u16 pkt_size;
5529                 u32 new_pkt_size = 0;
5530                 struct gaudi_packet *user_pkt, *kernel_pkt;
5531
5532                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5533                 kernel_pkt = parser->patched_cb->kernel_address +
5534                                         cb_patched_cur_length;
5535
5536                 pkt_id = (enum packet_id) (
5537                                 (le64_to_cpu(user_pkt->header) &
5538                                 PACKET_HEADER_PACKET_ID_MASK) >>
5539                                         PACKET_HEADER_PACKET_ID_SHIFT);
5540
5541                 if (!validate_packet_id(pkt_id)) {
5542                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5543                         rc = -EINVAL;
5544                         break;
5545                 }
5546
5547                 pkt_size = gaudi_packet_sizes[pkt_id];
5548                 cb_parsed_length += pkt_size;
5549                 if (cb_parsed_length > parser->user_cb_size) {
5550                         dev_err(hdev->dev,
5551                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5552                         rc = -EINVAL;
5553                         break;
5554                 }
5555
5556                 switch (pkt_id) {
5557                 case PACKET_LIN_DMA:
5558                         rc = gaudi_patch_dma_packet(hdev, parser,
5559                                         (struct packet_lin_dma *) user_pkt,
5560                                         (struct packet_lin_dma *) kernel_pkt,
5561                                         &new_pkt_size);
5562                         cb_patched_cur_length += new_pkt_size;
5563                         break;
5564
5565                 case PACKET_MSG_PROT:
5566                         dev_err(hdev->dev,
5567                                 "User not allowed to use MSG_PROT\n");
5568                         rc = -EPERM;
5569                         break;
5570
5571                 case PACKET_CP_DMA:
5572                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5573                         rc = -EPERM;
5574                         break;
5575
5576                 case PACKET_STOP:
5577                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5578                         rc = -EPERM;
5579                         break;
5580
5581                 case PACKET_WREG_32:
5582                 case PACKET_WREG_BULK:
5583                 case PACKET_MSG_LONG:
5584                 case PACKET_MSG_SHORT:
5585                 case PACKET_REPEAT:
5586                 case PACKET_FENCE:
5587                 case PACKET_NOP:
5588                 case PACKET_ARB_POINT:
5589                 case PACKET_LOAD_AND_EXE:
5590                         memcpy(kernel_pkt, user_pkt, pkt_size);
5591                         cb_patched_cur_length += pkt_size;
5592                         break;
5593
5594                 default:
5595                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5596                                 pkt_id);
5597                         rc = -EINVAL;
5598                         break;
5599                 }
5600
5601                 if (rc)
5602                         break;
5603         }
5604
5605         return rc;
5606 }
5607
5608 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5609                 struct hl_cs_parser *parser)
5610 {
5611         u64 patched_cb_handle;
5612         u32 patched_cb_size;
5613         struct hl_cb *user_cb;
5614         int rc;
5615
5616         /*
5617          * The new CB should have space at the end for two MSG_PROT pkt:
5618          * 1. A packet that will act as a completion packet
5619          * 2. A packet that will generate MSI interrupt
5620          */
5621         if (parser->completion)
5622                 parser->patched_cb_size = parser->user_cb_size +
5623                                 sizeof(struct packet_msg_prot) * 2;
5624         else
5625                 parser->patched_cb_size = parser->user_cb_size;
5626
5627         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5628                                 parser->patched_cb_size, false, false,
5629                                 &patched_cb_handle);
5630
5631         if (rc) {
5632                 dev_err(hdev->dev,
5633                         "Failed to allocate patched CB for DMA CS %d\n",
5634                         rc);
5635                 return rc;
5636         }
5637
5638         patched_cb_handle >>= PAGE_SHIFT;
5639         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5640                                 (u32) patched_cb_handle);
5641         /* hl_cb_get should never fail */
5642         if (!parser->patched_cb) {
5643                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5644                         (u32) patched_cb_handle);
5645                 rc = -EFAULT;
5646                 goto out;
5647         }
5648
5649         /*
5650          * The check that parser->user_cb_size <= parser->user_cb->size was done
5651          * in validate_queue_index().
5652          */
5653         memcpy(parser->patched_cb->kernel_address,
5654                 parser->user_cb->kernel_address,
5655                 parser->user_cb_size);
5656
5657         patched_cb_size = parser->patched_cb_size;
5658
5659         /* Validate patched CB instead of user CB */
5660         user_cb = parser->user_cb;
5661         parser->user_cb = parser->patched_cb;
5662         rc = gaudi_validate_cb(hdev, parser, true);
5663         parser->user_cb = user_cb;
5664
5665         if (rc) {
5666                 hl_cb_put(parser->patched_cb);
5667                 goto out;
5668         }
5669
5670         if (patched_cb_size != parser->patched_cb_size) {
5671                 dev_err(hdev->dev, "user CB size mismatch\n");
5672                 hl_cb_put(parser->patched_cb);
5673                 rc = -EINVAL;
5674                 goto out;
5675         }
5676
5677 out:
5678         /*
5679          * Always call cb destroy here because we still have 1 reference
5680          * to it by calling cb_get earlier. After the job will be completed,
5681          * cb_put will release it, but here we want to remove it from the
5682          * idr
5683          */
5684         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5685                                         patched_cb_handle << PAGE_SHIFT);
5686
5687         return rc;
5688 }
5689
5690 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5691                 struct hl_cs_parser *parser)
5692 {
5693         u64 patched_cb_handle;
5694         int rc;
5695
5696         rc = gaudi_validate_cb(hdev, parser, false);
5697
5698         if (rc)
5699                 goto free_userptr;
5700
5701         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5702                                 parser->patched_cb_size, false, false,
5703                                 &patched_cb_handle);
5704         if (rc) {
5705                 dev_err(hdev->dev,
5706                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5707                 goto free_userptr;
5708         }
5709
5710         patched_cb_handle >>= PAGE_SHIFT;
5711         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5712                                 (u32) patched_cb_handle);
5713         /* hl_cb_get should never fail here */
5714         if (!parser->patched_cb) {
5715                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5716                                 (u32) patched_cb_handle);
5717                 rc = -EFAULT;
5718                 goto out;
5719         }
5720
5721         rc = gaudi_patch_cb(hdev, parser);
5722
5723         if (rc)
5724                 hl_cb_put(parser->patched_cb);
5725
5726 out:
5727         /*
5728          * Always call cb destroy here because we still have 1 reference
5729          * to it by calling cb_get earlier. After the job will be completed,
5730          * cb_put will release it, but here we want to remove it from the
5731          * idr
5732          */
5733         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5734                                 patched_cb_handle << PAGE_SHIFT);
5735
5736 free_userptr:
5737         if (rc)
5738                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5739         return rc;
5740 }
5741
5742 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5743                                         struct hl_cs_parser *parser)
5744 {
5745         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5746         struct gaudi_device *gaudi = hdev->asic_specific;
5747         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5748                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5749
5750         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5751                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5752                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5753                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5754                                 parser->hw_queue_id);
5755                 return -EINVAL;
5756         }
5757
5758         /* For internal queue jobs just check if CB address is valid */
5759         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5760                                         parser->user_cb_size,
5761                                         asic_prop->sram_user_base_address,
5762                                         asic_prop->sram_end_address))
5763                 return 0;
5764
5765         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5766                                         parser->user_cb_size,
5767                                         asic_prop->dram_user_base_address,
5768                                         asic_prop->dram_end_address))
5769                 return 0;
5770
5771         /* PMMU and HPMMU addresses are equal, check only one of them */
5772         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5773                                         parser->user_cb_size,
5774                                         asic_prop->pmmu.start_addr,
5775                                         asic_prop->pmmu.end_addr))
5776                 return 0;
5777
5778         dev_err(hdev->dev,
5779                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5780                 parser->user_cb, parser->user_cb_size);
5781
5782         return -EFAULT;
5783 }
5784
5785 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5786 {
5787         struct gaudi_device *gaudi = hdev->asic_specific;
5788
5789         if (parser->queue_type == QUEUE_TYPE_INT)
5790                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5791
5792         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5793                 return gaudi_parse_cb_mmu(hdev, parser);
5794         else
5795                 return gaudi_parse_cb_no_mmu(hdev, parser);
5796 }
5797
5798 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5799                                         void *kernel_address, u32 len,
5800                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5801                                         bool eb)
5802 {
5803         struct gaudi_device *gaudi = hdev->asic_specific;
5804         struct packet_msg_prot *cq_pkt;
5805         u32 tmp;
5806
5807         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5808
5809         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5810         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5811
5812         if (eb)
5813                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5814
5815         cq_pkt->ctl = cpu_to_le32(tmp);
5816         cq_pkt->value = cpu_to_le32(cq_val);
5817         cq_pkt->addr = cpu_to_le64(cq_addr);
5818
5819         cq_pkt++;
5820
5821         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5822         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5823         cq_pkt->ctl = cpu_to_le32(tmp);
5824         cq_pkt->value = cpu_to_le32(1);
5825
5826         if (!gaudi->multi_msi_mode)
5827                 msi_vec = 0;
5828
5829         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5830 }
5831
5832 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5833 {
5834         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5835 }
5836
5837 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5838                                         u32 size, u64 val)
5839 {
5840         struct packet_lin_dma *lin_dma_pkt;
5841         struct hl_cs_job *job;
5842         u32 cb_size, ctl, err_cause;
5843         struct hl_cb *cb;
5844         u64 id;
5845         int rc;
5846
5847         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5848         if (!cb)
5849                 return -EFAULT;
5850
5851         lin_dma_pkt = cb->kernel_address;
5852         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5853         cb_size = sizeof(*lin_dma_pkt);
5854
5855         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5856         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5857         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5858         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5859         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5860
5861         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5862         lin_dma_pkt->src_addr = cpu_to_le64(val);
5863         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5864         lin_dma_pkt->tsize = cpu_to_le32(size);
5865
5866         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5867         if (!job) {
5868                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5869                 rc = -ENOMEM;
5870                 goto release_cb;
5871         }
5872
5873         /* Verify DMA is OK */
5874         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5875         if (err_cause && !hdev->init_done) {
5876                 dev_dbg(hdev->dev,
5877                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5878                         err_cause);
5879                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5880         }
5881
5882         job->id = 0;
5883         job->user_cb = cb;
5884         atomic_inc(&job->user_cb->cs_cnt);
5885         job->user_cb_size = cb_size;
5886         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5887         job->patched_cb = job->user_cb;
5888         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5889
5890         hl_debugfs_add_job(hdev, job);
5891
5892         rc = gaudi_send_job_on_qman0(hdev, job);
5893         hl_debugfs_remove_job(hdev, job);
5894         kfree(job);
5895         atomic_dec(&cb->cs_cnt);
5896
5897         /* Verify DMA is OK */
5898         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5899         if (err_cause) {
5900                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5901                 rc = -EIO;
5902                 if (!hdev->init_done) {
5903                         dev_dbg(hdev->dev,
5904                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5905                                 err_cause);
5906                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5907                 }
5908         }
5909
5910 release_cb:
5911         id = cb->id;
5912         hl_cb_put(cb);
5913         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5914
5915         return rc;
5916 }
5917
5918 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5919                                         u32 num_regs, u32 val)
5920 {
5921         struct packet_msg_long *pkt;
5922         struct hl_cs_job *job;
5923         u32 cb_size, ctl;
5924         struct hl_cb *cb;
5925         int i, rc;
5926
5927         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5928
5929         if (cb_size > SZ_2M) {
5930                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5931                 return -ENOMEM;
5932         }
5933
5934         cb = hl_cb_kernel_create(hdev, cb_size, false);
5935         if (!cb)
5936                 return -EFAULT;
5937
5938         pkt = cb->kernel_address;
5939
5940         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5941         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5942         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5943         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5944         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5945
5946         for (i = 0; i < num_regs ; i++, pkt++) {
5947                 pkt->ctl = cpu_to_le32(ctl);
5948                 pkt->value = cpu_to_le32(val);
5949                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5950         }
5951
5952         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5953         if (!job) {
5954                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5955                 rc = -ENOMEM;
5956                 goto release_cb;
5957         }
5958
5959         job->id = 0;
5960         job->user_cb = cb;
5961         atomic_inc(&job->user_cb->cs_cnt);
5962         job->user_cb_size = cb_size;
5963         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5964         job->patched_cb = job->user_cb;
5965         job->job_cb_size = cb_size;
5966
5967         hl_debugfs_add_job(hdev, job);
5968
5969         rc = gaudi_send_job_on_qman0(hdev, job);
5970         hl_debugfs_remove_job(hdev, job);
5971         kfree(job);
5972         atomic_dec(&cb->cs_cnt);
5973
5974 release_cb:
5975         hl_cb_put(cb);
5976         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5977
5978         return rc;
5979 }
5980
5981 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5982 {
5983         u64 base_addr;
5984         u32 num_regs;
5985         int rc;
5986
5987         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5988         num_regs = NUM_OF_SOB_IN_BLOCK;
5989         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5990         if (rc) {
5991                 dev_err(hdev->dev, "failed resetting SM registers");
5992                 return -ENOMEM;
5993         }
5994
5995         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5996         num_regs = NUM_OF_SOB_IN_BLOCK;
5997         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5998         if (rc) {
5999                 dev_err(hdev->dev, "failed resetting SM registers");
6000                 return -ENOMEM;
6001         }
6002
6003         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6004         num_regs = NUM_OF_SOB_IN_BLOCK;
6005         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6006         if (rc) {
6007                 dev_err(hdev->dev, "failed resetting SM registers");
6008                 return -ENOMEM;
6009         }
6010
6011         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6012         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6013         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6014         if (rc) {
6015                 dev_err(hdev->dev, "failed resetting SM registers");
6016                 return -ENOMEM;
6017         }
6018
6019         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6020         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6021         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6022         if (rc) {
6023                 dev_err(hdev->dev, "failed resetting SM registers");
6024                 return -ENOMEM;
6025         }
6026
6027         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6028         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6029         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6030         if (rc) {
6031                 dev_err(hdev->dev, "failed resetting SM registers");
6032                 return -ENOMEM;
6033         }
6034
6035         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6036                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6037         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6038         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6039         if (rc) {
6040                 dev_err(hdev->dev, "failed resetting SM registers");
6041                 return -ENOMEM;
6042         }
6043
6044         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6045                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6046         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6047         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6048         if (rc) {
6049                 dev_err(hdev->dev, "failed resetting SM registers");
6050                 return -ENOMEM;
6051         }
6052
6053         return 0;
6054 }
6055
6056 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6057 {
6058         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6059                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6060         int i;
6061
6062         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6063                 u64 sob_addr = CFG_BASE +
6064                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6065                                 (i * sob_delta);
6066                 u32 dma_offset = i * DMA_CORE_OFFSET;
6067
6068                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6069                                 lower_32_bits(sob_addr));
6070                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6071                                 upper_32_bits(sob_addr));
6072                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6073
6074                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6075                  * modified by the user for SRAM reduction
6076                  */
6077                 if (i > 1)
6078                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6079                                                                 0x00000001);
6080         }
6081 }
6082
6083 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6084 {
6085         u32 qman_offset;
6086         int i;
6087
6088         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6089                 qman_offset = i * DMA_QMAN_OFFSET;
6090                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6091         }
6092
6093         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6094                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6095                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6096         }
6097
6098         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6099                 qman_offset = i * TPC_QMAN_OFFSET;
6100                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6101         }
6102
6103         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6104                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6105                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6106                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6107         }
6108 }
6109
6110 static int gaudi_restore_user_registers(struct hl_device *hdev)
6111 {
6112         int rc;
6113
6114         rc = gaudi_restore_sm_registers(hdev);
6115         if (rc)
6116                 return rc;
6117
6118         gaudi_restore_dma_registers(hdev);
6119         gaudi_restore_qm_registers(hdev);
6120
6121         return 0;
6122 }
6123
6124 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6125 {
6126         return 0;
6127 }
6128
6129 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6130 {
6131         struct asic_fixed_properties *prop = &hdev->asic_prop;
6132         struct gaudi_device *gaudi = hdev->asic_specific;
6133         u64 addr = prop->mmu_pgt_addr;
6134         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6135
6136         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6137                 return 0;
6138
6139         return gaudi_memset_device_memory(hdev, addr, size, 0);
6140 }
6141
6142 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6143 {
6144
6145 }
6146
6147 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6148                         bool user_address, u32 *val)
6149 {
6150         struct asic_fixed_properties *prop = &hdev->asic_prop;
6151         struct gaudi_device *gaudi = hdev->asic_specific;
6152         u64 hbm_bar_addr, host_phys_end;
6153         int rc = 0;
6154
6155         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6156
6157         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6158
6159                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6160                                 (hdev->clock_gating_mask &
6161                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6162
6163                         dev_err_ratelimited(hdev->dev,
6164                                 "Can't read register - clock gating is enabled!\n");
6165                         rc = -EFAULT;
6166                 } else {
6167                         *val = RREG32(addr - CFG_BASE);
6168                 }
6169
6170         } else if ((addr >= SRAM_BASE_ADDR) &&
6171                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6172                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6173                                 (addr - SRAM_BASE_ADDR));
6174         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6175                 u64 bar_base_addr = DRAM_PHYS_BASE +
6176                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6177
6178                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6179                 if (hbm_bar_addr != U64_MAX) {
6180                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6181                                                 (addr - bar_base_addr));
6182
6183                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6184                                                 hbm_bar_addr);
6185                 }
6186                 if (hbm_bar_addr == U64_MAX)
6187                         rc = -EIO;
6188         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6189                         user_address && !iommu_present(&pci_bus_type)) {
6190                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6191         } else {
6192                 rc = -EFAULT;
6193         }
6194
6195         return rc;
6196 }
6197
6198 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6199                         bool user_address, u32 val)
6200 {
6201         struct asic_fixed_properties *prop = &hdev->asic_prop;
6202         struct gaudi_device *gaudi = hdev->asic_specific;
6203         u64 hbm_bar_addr, host_phys_end;
6204         int rc = 0;
6205
6206         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6207
6208         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6209
6210                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6211                                 (hdev->clock_gating_mask &
6212                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6213
6214                         dev_err_ratelimited(hdev->dev,
6215                                 "Can't write register - clock gating is enabled!\n");
6216                         rc = -EFAULT;
6217                 } else {
6218                         WREG32(addr - CFG_BASE, val);
6219                 }
6220
6221         } else if ((addr >= SRAM_BASE_ADDR) &&
6222                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6223                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6224                                         (addr - SRAM_BASE_ADDR));
6225         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6226                 u64 bar_base_addr = DRAM_PHYS_BASE +
6227                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6228
6229                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6230                 if (hbm_bar_addr != U64_MAX) {
6231                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6232                                                 (addr - bar_base_addr));
6233
6234                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6235                                                 hbm_bar_addr);
6236                 }
6237                 if (hbm_bar_addr == U64_MAX)
6238                         rc = -EIO;
6239         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6240                         user_address && !iommu_present(&pci_bus_type)) {
6241                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6242         } else {
6243                 rc = -EFAULT;
6244         }
6245
6246         return rc;
6247 }
6248
6249 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6250                                 bool user_address, u64 *val)
6251 {
6252         struct asic_fixed_properties *prop = &hdev->asic_prop;
6253         struct gaudi_device *gaudi = hdev->asic_specific;
6254         u64 hbm_bar_addr, host_phys_end;
6255         int rc = 0;
6256
6257         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6258
6259         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6260
6261                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6262                                 (hdev->clock_gating_mask &
6263                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6264
6265                         dev_err_ratelimited(hdev->dev,
6266                                 "Can't read register - clock gating is enabled!\n");
6267                         rc = -EFAULT;
6268                 } else {
6269                         u32 val_l = RREG32(addr - CFG_BASE);
6270                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6271
6272                         *val = (((u64) val_h) << 32) | val_l;
6273                 }
6274
6275         } else if ((addr >= SRAM_BASE_ADDR) &&
6276                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6277                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6278                                 (addr - SRAM_BASE_ADDR));
6279         } else if (addr <=
6280                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6281                 u64 bar_base_addr = DRAM_PHYS_BASE +
6282                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6283
6284                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6285                 if (hbm_bar_addr != U64_MAX) {
6286                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6287                                                 (addr - bar_base_addr));
6288
6289                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6290                                                 hbm_bar_addr);
6291                 }
6292                 if (hbm_bar_addr == U64_MAX)
6293                         rc = -EIO;
6294         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6295                         user_address && !iommu_present(&pci_bus_type)) {
6296                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6297         } else {
6298                 rc = -EFAULT;
6299         }
6300
6301         return rc;
6302 }
6303
6304 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6305                                 bool user_address, u64 val)
6306 {
6307         struct asic_fixed_properties *prop = &hdev->asic_prop;
6308         struct gaudi_device *gaudi = hdev->asic_specific;
6309         u64 hbm_bar_addr, host_phys_end;
6310         int rc = 0;
6311
6312         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6313
6314         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6315
6316                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6317                                 (hdev->clock_gating_mask &
6318                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6319
6320                         dev_err_ratelimited(hdev->dev,
6321                                 "Can't write register - clock gating is enabled!\n");
6322                         rc = -EFAULT;
6323                 } else {
6324                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6325                         WREG32(addr + sizeof(u32) - CFG_BASE,
6326                                 upper_32_bits(val));
6327                 }
6328
6329         } else if ((addr >= SRAM_BASE_ADDR) &&
6330                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6331                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6332                                         (addr - SRAM_BASE_ADDR));
6333         } else if (addr <=
6334                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6335                 u64 bar_base_addr = DRAM_PHYS_BASE +
6336                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6337
6338                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6339                 if (hbm_bar_addr != U64_MAX) {
6340                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6341                                                 (addr - bar_base_addr));
6342
6343                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6344                                                 hbm_bar_addr);
6345                 }
6346                 if (hbm_bar_addr == U64_MAX)
6347                         rc = -EIO;
6348         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6349                         user_address && !iommu_present(&pci_bus_type)) {
6350                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6351         } else {
6352                 rc = -EFAULT;
6353         }
6354
6355         return rc;
6356 }
6357
6358 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6359                                         u32 size_to_dma, dma_addr_t dma_addr)
6360 {
6361         u32 err_cause, val;
6362         u64 dma_offset;
6363         int rc;
6364
6365         dma_offset = dma_id * DMA_CORE_OFFSET;
6366
6367         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6368         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6369         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6370         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6371         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6372         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6373                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6374
6375         rc = hl_poll_timeout(
6376                 hdev,
6377                 mmDMA0_CORE_STS0 + dma_offset,
6378                 val,
6379                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6380                 0,
6381                 1000000);
6382
6383         if (rc) {
6384                 dev_err(hdev->dev,
6385                         "DMA %d timed-out during reading of 0x%llx\n",
6386                         dma_id, addr);
6387                 return -EIO;
6388         }
6389
6390         /* Verify DMA is OK */
6391         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6392         if (err_cause) {
6393                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6394                 dev_dbg(hdev->dev,
6395                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6396                         err_cause);
6397                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6398
6399                 return -EIO;
6400         }
6401
6402         return 0;
6403 }
6404
6405 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6406                                 void *blob_addr)
6407 {
6408         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6409         struct gaudi_device *gaudi = hdev->asic_specific;
6410         u64 dma_offset, qm_offset;
6411         dma_addr_t dma_addr;
6412         void *kernel_addr;
6413         bool is_eng_idle;
6414         int rc = 0, dma_id;
6415
6416         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6417                                                 hdev, SZ_2M,
6418                                                 &dma_addr,
6419                                                 GFP_KERNEL | __GFP_ZERO);
6420
6421         if (!kernel_addr)
6422                 return -ENOMEM;
6423
6424         mutex_lock(&gaudi->clk_gate_mutex);
6425
6426         hdev->asic_funcs->disable_clock_gating(hdev);
6427
6428         hdev->asic_funcs->hw_queues_lock(hdev);
6429
6430         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6431         dma_offset = dma_id * DMA_CORE_OFFSET;
6432         qm_offset = dma_id * DMA_QMAN_OFFSET;
6433         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6434         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6435
6436         if (!is_eng_idle) {
6437                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6438                 dma_offset = dma_id * DMA_CORE_OFFSET;
6439                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6440                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6441                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6442
6443                 if (!is_eng_idle) {
6444                         dev_err_ratelimited(hdev->dev,
6445                                 "Can't read via DMA because it is BUSY\n");
6446                         rc = -EAGAIN;
6447                         goto out;
6448                 }
6449         }
6450
6451         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6452         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6453                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6454
6455         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6456          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6457          * ASID
6458          */
6459         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6460
6461         /* Verify DMA is OK */
6462         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6463         if (err_cause) {
6464                 dev_dbg(hdev->dev,
6465                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6466                         err_cause);
6467                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6468         }
6469
6470         pos = 0;
6471         size_left = size;
6472         size_to_dma = SZ_2M;
6473
6474         while (size_left > 0) {
6475
6476                 if (size_left < SZ_2M)
6477                         size_to_dma = size_left;
6478
6479                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6480                                                 dma_addr);
6481                 if (rc)
6482                         break;
6483
6484                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6485
6486                 if (size_left <= SZ_2M)
6487                         break;
6488
6489                 pos += SZ_2M;
6490                 addr += SZ_2M;
6491                 size_left -= SZ_2M;
6492         }
6493
6494         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6495          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6496          * ASID
6497          */
6498         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6499                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6500
6501         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6502
6503 out:
6504         hdev->asic_funcs->hw_queues_unlock(hdev);
6505
6506         hdev->asic_funcs->set_clock_gating(hdev);
6507
6508         mutex_unlock(&gaudi->clk_gate_mutex);
6509
6510         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6511                                                 dma_addr);
6512
6513         return rc;
6514 }
6515
6516 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6517 {
6518         struct gaudi_device *gaudi = hdev->asic_specific;
6519
6520         if (hdev->hard_reset_pending)
6521                 return U64_MAX;
6522
6523         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6524                         (addr - gaudi->hbm_bar_cur_addr));
6525 }
6526
6527 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6528 {
6529         struct gaudi_device *gaudi = hdev->asic_specific;
6530
6531         if (hdev->hard_reset_pending)
6532                 return;
6533
6534         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6535                         (addr - gaudi->hbm_bar_cur_addr));
6536 }
6537
6538 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6539 {
6540         /* mask to zero the MMBP and ASID bits */
6541         WREG32_AND(reg, ~0x7FF);
6542         WREG32_OR(reg, asid);
6543 }
6544
6545 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6546 {
6547         struct gaudi_device *gaudi = hdev->asic_specific;
6548
6549         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6550                 return;
6551
6552         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6553                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6554                 return;
6555         }
6556
6557         mutex_lock(&gaudi->clk_gate_mutex);
6558
6559         hdev->asic_funcs->disable_clock_gating(hdev);
6560
6561         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6562         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6563         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6566
6567         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6572
6573         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6578
6579         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6584
6585         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6590
6591         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6596
6597         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6602
6603         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6608
6609         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6617
6618         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6625
6626         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6628         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6633
6634         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6637         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6638         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6641
6642         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6644         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6649
6650         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6652         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6656         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6657
6658         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6660         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6665
6666         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6669         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6670         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6671         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6672         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6673
6674         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6676         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6677         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6678         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6679         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6680         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6681
6682         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6683         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6684         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6685         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6686         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6687         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6688         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6689         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6690         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6691         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6692
6693         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6694         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6695         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6696         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6697         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6698         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6699         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6700         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6701         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6702         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6703         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6704         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6705
6706         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6707                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6708                                 asid);
6709                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6710                                 asid);
6711                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6712                                 asid);
6713                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6714                                 asid);
6715                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6716                                 asid);
6717         }
6718
6719         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6720                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6721                                 asid);
6722                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6723                                 asid);
6724                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6725                                 asid);
6726                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6727                                 asid);
6728                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6729                                 asid);
6730         }
6731
6732         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6733                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6734                                 asid);
6735                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6736                                 asid);
6737                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6738                                 asid);
6739                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6740                                 asid);
6741                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6742                                 asid);
6743         }
6744
6745         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6746                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6747                                 asid);
6748                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6749                                 asid);
6750                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6751                                 asid);
6752                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6753                                 asid);
6754                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6755                                 asid);
6756         }
6757
6758         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6759                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6760                                 asid);
6761                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6762                                 asid);
6763                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6764                                 asid);
6765                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6766                                 asid);
6767                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6768                                 asid);
6769         }
6770
6771         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6772                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6773                                 asid);
6774                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6775                                 asid);
6776                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6777                                 asid);
6778                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6779                                 asid);
6780                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6781                                 asid);
6782         }
6783
6784         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6785                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6786                                 asid);
6787                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6788                                 asid);
6789                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6790                                 asid);
6791                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6792                                 asid);
6793                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6794                                 asid);
6795         }
6796
6797         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6798                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6799                                 asid);
6800                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6801                                 asid);
6802                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6803                                 asid);
6804                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6805                                 asid);
6806                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6807                                 asid);
6808         }
6809
6810         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6811                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6812                                 asid);
6813                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6814                                 asid);
6815                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6816                                 asid);
6817                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6818                                 asid);
6819                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6820                                 asid);
6821         }
6822
6823         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6824                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6825                                 asid);
6826                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6827                                 asid);
6828                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6829                                 asid);
6830                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6831                                 asid);
6832                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6833                                 asid);
6834         }
6835
6836         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6837         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6838
6839         hdev->asic_funcs->set_clock_gating(hdev);
6840
6841         mutex_unlock(&gaudi->clk_gate_mutex);
6842 }
6843
6844 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6845                 struct hl_cs_job *job)
6846 {
6847         struct packet_msg_prot *fence_pkt;
6848         u32 *fence_ptr;
6849         dma_addr_t fence_dma_addr;
6850         struct hl_cb *cb;
6851         u32 tmp, timeout, dma_offset;
6852         int rc;
6853
6854         if (hdev->pldm)
6855                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6856         else
6857                 timeout = HL_DEVICE_TIMEOUT_USEC;
6858
6859         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6860                 dev_err_ratelimited(hdev->dev,
6861                         "Can't send driver job on QMAN0 because the device is not idle\n");
6862                 return -EBUSY;
6863         }
6864
6865         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6866                                                         &fence_dma_addr);
6867         if (!fence_ptr) {
6868                 dev_err(hdev->dev,
6869                         "Failed to allocate fence memory for QMAN0\n");
6870                 return -ENOMEM;
6871         }
6872
6873         cb = job->patched_cb;
6874
6875         fence_pkt = cb->kernel_address +
6876                         job->job_cb_size - sizeof(struct packet_msg_prot);
6877
6878         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6879         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6880         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6881
6882         fence_pkt->ctl = cpu_to_le32(tmp);
6883         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6884         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6885
6886         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6887
6888         WREG32(mmDMA0_CORE_PROT + dma_offset,
6889                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6890
6891         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6892                                         job->job_cb_size, cb->bus_address);
6893         if (rc) {
6894                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6895                 goto free_fence_ptr;
6896         }
6897
6898         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6899                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6900                                 timeout, true);
6901
6902         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6903
6904         if (rc == -ETIMEDOUT) {
6905                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6906                 goto free_fence_ptr;
6907         }
6908
6909 free_fence_ptr:
6910         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6911
6912         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6913                                         fence_dma_addr);
6914         return rc;
6915 }
6916
6917 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6918 {
6919         if (event_type >= GAUDI_EVENT_SIZE)
6920                 goto event_not_supported;
6921
6922         if (!gaudi_irq_map_table[event_type].valid)
6923                 goto event_not_supported;
6924
6925         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6926
6927         return;
6928
6929 event_not_supported:
6930         snprintf(desc, size, "N/A");
6931 }
6932
6933 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6934                                                         u32 x_y, bool is_write)
6935 {
6936         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6937
6938         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6939                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6940
6941         switch (x_y) {
6942         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6943         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6944                 dma_id[0] = 0;
6945                 dma_id[1] = 2;
6946                 break;
6947         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6948         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6949                 dma_id[0] = 1;
6950                 dma_id[1] = 3;
6951                 break;
6952         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6953         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6954                 dma_id[0] = 4;
6955                 dma_id[1] = 6;
6956                 break;
6957         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6958         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6959                 dma_id[0] = 5;
6960                 dma_id[1] = 7;
6961                 break;
6962         default:
6963                 goto unknown_initiator;
6964         }
6965
6966         for (i = 0 ; i < 2 ; i++) {
6967                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6968                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6969         }
6970
6971         switch (x_y) {
6972         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6973         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6974                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6975                         return "DMA0";
6976                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6977                         return "DMA2";
6978                 else
6979                         return "DMA0 or DMA2";
6980         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6981         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6982                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6983                         return "DMA1";
6984                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6985                         return "DMA3";
6986                 else
6987                         return "DMA1 or DMA3";
6988         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6989         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6990                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6991                         return "DMA4";
6992                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6993                         return "DMA6";
6994                 else
6995                         return "DMA4 or DMA6";
6996         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6997         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6998                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6999                         return "DMA5";
7000                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7001                         return "DMA7";
7002                 else
7003                         return "DMA5 or DMA7";
7004         }
7005
7006 unknown_initiator:
7007         return "unknown initiator";
7008 }
7009
7010 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7011                                                         bool is_write)
7012 {
7013         u32 val, x_y, axi_id;
7014
7015         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7016                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
7017         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7018                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7019         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7020                         RAZWI_INITIATOR_AXI_ID_SHIFT);
7021
7022         switch (x_y) {
7023         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7024                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7025                         return "TPC0";
7026                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7027                         return "NIC0";
7028                 break;
7029         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7030                 return "TPC1";
7031         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7032         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7033                 return "MME0";
7034         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7035         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7036                 return "MME1";
7037         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7038                 return "TPC2";
7039         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7040                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7041                         return "TPC3";
7042                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7043                         return "PCI";
7044                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7045                         return "CPU";
7046                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7047                         return "PSOC";
7048                 break;
7049         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7050         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7051         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7052         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7053         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7054         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7055         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7056         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7057                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7058         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7059                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7060                         return "TPC4";
7061                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7062                         return "NIC1";
7063                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7064                         return "NIC2";
7065                 break;
7066         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7067                 return "TPC5";
7068         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7069         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7070                 return "MME2";
7071         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7072         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7073                 return "MME3";
7074         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7075                 return "TPC6";
7076         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7077                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7078                         return "TPC7";
7079                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7080                         return "NIC4";
7081                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7082                         return "NIC5";
7083                 break;
7084         default:
7085                 break;
7086         }
7087
7088         dev_err(hdev->dev,
7089                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7090                 val,
7091                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7092                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7093                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7094                         RAZWI_INITIATOR_AXI_ID_MASK);
7095
7096         return "unknown initiator";
7097 }
7098
7099 static void gaudi_print_razwi_info(struct hl_device *hdev)
7100 {
7101         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7102                 dev_err_ratelimited(hdev->dev,
7103                         "RAZWI event caused by illegal write of %s\n",
7104                         gaudi_get_razwi_initiator_name(hdev, true));
7105                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7106         }
7107
7108         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7109                 dev_err_ratelimited(hdev->dev,
7110                         "RAZWI event caused by illegal read of %s\n",
7111                         gaudi_get_razwi_initiator_name(hdev, false));
7112                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7113         }
7114 }
7115
7116 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7117 {
7118         struct gaudi_device *gaudi = hdev->asic_specific;
7119         u64 addr;
7120         u32 val;
7121
7122         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7123                 return;
7124
7125         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7126         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7127                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7128                 addr <<= 32;
7129                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7130
7131                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7132                                         addr);
7133
7134                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7135         }
7136
7137         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7138         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7139                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7140                 addr <<= 32;
7141                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7142
7143                 dev_err_ratelimited(hdev->dev,
7144                                 "MMU access error on va 0x%llx\n", addr);
7145
7146                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7147         }
7148 }
7149
7150 /*
7151  *  +-------------------+------------------------------------------------------+
7152  *  | Configuration Reg |                     Description                      |
7153  *  |      Address      |                                                      |
7154  *  +-------------------+------------------------------------------------------+
7155  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7156  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7157  *  |                   |0xF34 memory wrappers 63:32                           |
7158  *  |                   |0xF38 memory wrappers 95:64                           |
7159  *  |                   |0xF3C memory wrappers 127:96                          |
7160  *  +-------------------+------------------------------------------------------+
7161  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7162  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7163  *  |                   |0xF44 memory wrappers 63:32                           |
7164  *  |                   |0xF48 memory wrappers 95:64                           |
7165  *  |                   |0xF4C memory wrappers 127:96                          |
7166  *  +-------------------+------------------------------------------------------+
7167  */
7168 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7169                 struct ecc_info_extract_params *params, u64 *ecc_address,
7170                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7171 {
7172         struct gaudi_device *gaudi = hdev->asic_specific;
7173         u32 i, num_mem_regs, reg, err_bit;
7174         u64 err_addr, err_word = 0;
7175         int rc = 0;
7176
7177         num_mem_regs = params->num_memories / 32 +
7178                         ((params->num_memories % 32) ? 1 : 0);
7179
7180         if (params->block_address >= CFG_BASE)
7181                 params->block_address -= CFG_BASE;
7182
7183         if (params->derr)
7184                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7185         else
7186                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7187
7188         if (params->disable_clock_gating) {
7189                 mutex_lock(&gaudi->clk_gate_mutex);
7190                 hdev->asic_funcs->disable_clock_gating(hdev);
7191         }
7192
7193         /* Set invalid wrapper index */
7194         *memory_wrapper_idx = 0xFF;
7195
7196         /* Iterate through memory wrappers, a single bit must be set */
7197         for (i = 0 ; i < num_mem_regs ; i++) {
7198                 err_addr += i * 4;
7199                 err_word = RREG32(err_addr);
7200                 if (err_word) {
7201                         err_bit = __ffs(err_word);
7202                         *memory_wrapper_idx = err_bit + (32 * i);
7203                         break;
7204                 }
7205         }
7206
7207         if (*memory_wrapper_idx == 0xFF) {
7208                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7209                 rc = -EINVAL;
7210                 goto enable_clk_gate;
7211         }
7212
7213         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7214                         *memory_wrapper_idx);
7215
7216         *ecc_address =
7217                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7218         *ecc_syndrom =
7219                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7220
7221         /* Clear error indication */
7222         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7223         if (params->derr)
7224                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7225         else
7226                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7227
7228         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7229
7230 enable_clk_gate:
7231         if (params->disable_clock_gating) {
7232                 hdev->asic_funcs->set_clock_gating(hdev);
7233
7234                 mutex_unlock(&gaudi->clk_gate_mutex);
7235         }
7236
7237         return rc;
7238 }
7239
7240 /*
7241  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7242  *
7243  * @idx: the current pi/ci value
7244  * @q_len: the queue length (power of 2)
7245  *
7246  * @return the cyclically decremented index
7247  */
7248 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7249 {
7250         u32 mask = q_len - 1;
7251
7252         /*
7253          * modular decrement is equivalent to adding (queue_size -1)
7254          * later we take LSBs to make sure the value is in the
7255          * range [0, queue_len - 1]
7256          */
7257         return (idx + q_len - 1) & mask;
7258 }
7259
7260 /**
7261  * gaudi_print_sw_config_stream_data - print SW config stream data
7262  *
7263  * @hdev: pointer to the habanalabs device structure
7264  * @stream: the QMAN's stream
7265  * @qman_base: base address of QMAN registers block
7266  */
7267 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7268                                                 u64 qman_base)
7269 {
7270         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7271         u32 cq_ptr_lo_off, size;
7272
7273         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7274
7275         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7276                                                 stream * cq_ptr_lo_off;
7277         cq_ptr_hi = cq_ptr_lo +
7278                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7279         cq_tsize = cq_ptr_lo +
7280                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7281
7282         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7283         size = RREG32(cq_tsize);
7284         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7285                                                         stream, cq_ptr, size);
7286 }
7287
7288 /**
7289  * gaudi_print_last_pqes_on_err - print last PQEs on error
7290  *
7291  * @hdev: pointer to the habanalabs device structure
7292  * @qid_base: first QID of the QMAN (out of 4 streams)
7293  * @stream: the QMAN's stream
7294  * @qman_base: base address of QMAN registers block
7295  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7296  */
7297 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7298                                                 u32 stream, u64 qman_base,
7299                                                 bool pr_sw_conf)
7300 {
7301         u32 ci, qm_ci_stream_off, queue_len;
7302         struct hl_hw_queue *q;
7303         u64 pq_ci;
7304         int i;
7305
7306         q = &hdev->kernel_queues[qid_base + stream];
7307
7308         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7309         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7310                                                 stream * qm_ci_stream_off;
7311
7312         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7313                                         q->int_queue_len : HL_QUEUE_LENGTH;
7314
7315         hdev->asic_funcs->hw_queues_lock(hdev);
7316
7317         if (pr_sw_conf)
7318                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7319
7320         ci = RREG32(pq_ci);
7321
7322         /* we should start printing form ci -1 */
7323         ci = gaudi_queue_idx_dec(ci, queue_len);
7324
7325         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7326                 struct hl_bd *bd;
7327                 u64 addr;
7328                 u32 len;
7329
7330                 bd = q->kernel_address;
7331                 bd += ci;
7332
7333                 len = le32_to_cpu(bd->len);
7334                 /* len 0 means uninitialized entry- break */
7335                 if (!len)
7336                         break;
7337
7338                 addr = le64_to_cpu(bd->ptr);
7339
7340                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7341                                                         stream, ci, addr, len);
7342
7343                 /* get previous ci, wrap if needed */
7344                 ci = gaudi_queue_idx_dec(ci, queue_len);
7345         }
7346
7347         hdev->asic_funcs->hw_queues_unlock(hdev);
7348 }
7349
7350 /**
7351  * print_qman_data_on_err - extract QMAN data on error
7352  *
7353  * @hdev: pointer to the habanalabs device structure
7354  * @qid_base: first QID of the QMAN (out of 4 streams)
7355  * @stream: the QMAN's stream
7356  * @qman_base: base address of QMAN registers block
7357  *
7358  * This function attempt to exatract as much data as possible on QMAN error.
7359  * On upper CP print the SW config stream data and last 8 PQEs.
7360  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7361  */
7362 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7363                                                 u32 stream, u64 qman_base)
7364 {
7365         u32 i;
7366
7367         if (stream != QMAN_STREAMS) {
7368                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7369                                                                         true);
7370                 return;
7371         }
7372
7373         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7374
7375         for (i = 0; i < QMAN_STREAMS; i++)
7376                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7377                                                                         false);
7378 }
7379
7380 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7381                                           const char *qm_name,
7382                                           u64 qman_base,
7383                                           u32 qid_base)
7384 {
7385         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7386         u64 glbl_sts_addr, arb_err_addr;
7387         char reg_desc[32];
7388
7389         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7390         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7391
7392         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7393         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7394                 glbl_sts_clr_val = 0;
7395                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7396
7397                 if (!glbl_sts_val)
7398                         continue;
7399
7400                 if (i == QMAN_STREAMS)
7401                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7402                 else
7403                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7404
7405                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7406                         if (glbl_sts_val & BIT(j)) {
7407                                 dev_err_ratelimited(hdev->dev,
7408                                                 "%s %s. err cause: %s\n",
7409                                                 qm_name, reg_desc,
7410                                                 gaudi_qman_error_cause[j]);
7411                                 glbl_sts_clr_val |= BIT(j);
7412                         }
7413                 }
7414
7415                 /* Write 1 clear errors */
7416                 if (!hdev->stop_on_err)
7417                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7418                 else
7419                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7420         }
7421
7422         arb_err_val = RREG32(arb_err_addr);
7423
7424         if (!arb_err_val)
7425                 return;
7426
7427         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7428                 if (arb_err_val & BIT(j)) {
7429                         dev_err_ratelimited(hdev->dev,
7430                                         "%s ARB_ERR. err cause: %s\n",
7431                                         qm_name,
7432                                         gaudi_qman_arb_error_cause[j]);
7433                 }
7434         }
7435 }
7436
7437 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7438                 struct hl_eq_sm_sei_data *sei_data)
7439 {
7440         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7441
7442         /* Flip the bits as the enum is ordered in the opposite way */
7443         index = (index ^ 0x3) & 0x3;
7444
7445         switch (sei_data->sei_cause) {
7446         case SM_SEI_SO_OVERFLOW:
7447                 dev_err_ratelimited(hdev->dev,
7448                         "%s SEI Error: SOB Group %u overflow/underflow",
7449                         gaudi_sync_manager_names[index],
7450                         le32_to_cpu(sei_data->sei_log));
7451                 break;
7452         case SM_SEI_LBW_4B_UNALIGNED:
7453                 dev_err_ratelimited(hdev->dev,
7454                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7455                         gaudi_sync_manager_names[index],
7456                         le32_to_cpu(sei_data->sei_log));
7457                 break;
7458         case SM_SEI_AXI_RESPONSE_ERR:
7459                 dev_err_ratelimited(hdev->dev,
7460                         "%s SEI Error: AXI ID %u response error",
7461                         gaudi_sync_manager_names[index],
7462                         le32_to_cpu(sei_data->sei_log));
7463                 break;
7464         default:
7465                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7466                                 le32_to_cpu(sei_data->sei_log));
7467                 break;
7468         }
7469 }
7470
7471 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7472                 struct hl_eq_ecc_data *ecc_data)
7473 {
7474         struct ecc_info_extract_params params;
7475         u64 ecc_address = 0, ecc_syndrom = 0;
7476         u8 index, memory_wrapper_idx = 0;
7477         bool extract_info_from_fw;
7478         int rc;
7479
7480         if (hdev->asic_prop.fw_security_enabled) {
7481                 extract_info_from_fw = true;
7482                 goto extract_ecc_info;
7483         }
7484
7485         switch (event_type) {
7486         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7487         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7488                 extract_info_from_fw = true;
7489                 break;
7490         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7491                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7492                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7493                 params.num_memories = 90;
7494                 params.derr = false;
7495                 params.disable_clock_gating = true;
7496                 extract_info_from_fw = false;
7497                 break;
7498         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7499                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7500                 params.block_address =
7501                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7502                 params.num_memories = 90;
7503                 params.derr = true;
7504                 params.disable_clock_gating = true;
7505                 extract_info_from_fw = false;
7506                 break;
7507         case GAUDI_EVENT_MME0_ACC_SERR:
7508         case GAUDI_EVENT_MME1_ACC_SERR:
7509         case GAUDI_EVENT_MME2_ACC_SERR:
7510         case GAUDI_EVENT_MME3_ACC_SERR:
7511                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7512                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7513                 params.num_memories = 128;
7514                 params.derr = false;
7515                 params.disable_clock_gating = true;
7516                 extract_info_from_fw = false;
7517                 break;
7518         case GAUDI_EVENT_MME0_ACC_DERR:
7519         case GAUDI_EVENT_MME1_ACC_DERR:
7520         case GAUDI_EVENT_MME2_ACC_DERR:
7521         case GAUDI_EVENT_MME3_ACC_DERR:
7522                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7523                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7524                 params.num_memories = 128;
7525                 params.derr = true;
7526                 params.disable_clock_gating = true;
7527                 extract_info_from_fw = false;
7528                 break;
7529         case GAUDI_EVENT_MME0_SBAB_SERR:
7530         case GAUDI_EVENT_MME1_SBAB_SERR:
7531         case GAUDI_EVENT_MME2_SBAB_SERR:
7532         case GAUDI_EVENT_MME3_SBAB_SERR:
7533                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7534                 params.block_address =
7535                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7536                 params.num_memories = 33;
7537                 params.derr = false;
7538                 params.disable_clock_gating = true;
7539                 extract_info_from_fw = false;
7540                 break;
7541         case GAUDI_EVENT_MME0_SBAB_DERR:
7542         case GAUDI_EVENT_MME1_SBAB_DERR:
7543         case GAUDI_EVENT_MME2_SBAB_DERR:
7544         case GAUDI_EVENT_MME3_SBAB_DERR:
7545                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7546                 params.block_address =
7547                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7548                 params.num_memories = 33;
7549                 params.derr = true;
7550                 params.disable_clock_gating = true;
7551                 extract_info_from_fw = false;
7552                 break;
7553         default:
7554                 return;
7555         }
7556
7557 extract_ecc_info:
7558         if (extract_info_from_fw) {
7559                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7560                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7561                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7562         } else {
7563                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7564                                 &ecc_syndrom, &memory_wrapper_idx);
7565                 if (rc)
7566                         return;
7567         }
7568
7569         dev_err(hdev->dev,
7570                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7571                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7572 }
7573
7574 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7575 {
7576         u64 qman_base;
7577         char desc[32];
7578         u32 qid_base;
7579         u8 index;
7580
7581         switch (event_type) {
7582         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7583                 index = event_type - GAUDI_EVENT_TPC0_QM;
7584                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7585                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7586                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7587                 break;
7588         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7589                 index = event_type - GAUDI_EVENT_MME0_QM;
7590                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7591                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7592                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7593                 break;
7594         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7595                 index = event_type - GAUDI_EVENT_DMA0_QM;
7596                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7597                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7598                 if (index > 1)
7599                         qid_base++;
7600                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7601                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7602                 break;
7603         case GAUDI_EVENT_NIC0_QM0:
7604                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7605                 qman_base = mmNIC0_QM0_BASE;
7606                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7607                 break;
7608         case GAUDI_EVENT_NIC0_QM1:
7609                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7610                 qman_base = mmNIC0_QM1_BASE;
7611                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7612                 break;
7613         case GAUDI_EVENT_NIC1_QM0:
7614                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7615                 qman_base = mmNIC1_QM0_BASE;
7616                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7617                 break;
7618         case GAUDI_EVENT_NIC1_QM1:
7619                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7620                 qman_base = mmNIC1_QM1_BASE;
7621                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7622                 break;
7623         case GAUDI_EVENT_NIC2_QM0:
7624                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7625                 qman_base = mmNIC2_QM0_BASE;
7626                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7627                 break;
7628         case GAUDI_EVENT_NIC2_QM1:
7629                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7630                 qman_base = mmNIC2_QM1_BASE;
7631                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7632                 break;
7633         case GAUDI_EVENT_NIC3_QM0:
7634                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7635                 qman_base = mmNIC3_QM0_BASE;
7636                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7637                 break;
7638         case GAUDI_EVENT_NIC3_QM1:
7639                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7640                 qman_base = mmNIC3_QM1_BASE;
7641                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7642                 break;
7643         case GAUDI_EVENT_NIC4_QM0:
7644                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7645                 qman_base = mmNIC4_QM0_BASE;
7646                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7647                 break;
7648         case GAUDI_EVENT_NIC4_QM1:
7649                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7650                 qman_base = mmNIC4_QM1_BASE;
7651                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7652                 break;
7653         default:
7654                 return;
7655         }
7656
7657         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7658 }
7659
7660 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7661                                         bool razwi)
7662 {
7663         char desc[64] = "";
7664
7665         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7666         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7667                 event_type, desc);
7668
7669         if (razwi) {
7670                 gaudi_print_razwi_info(hdev);
7671                 gaudi_print_mmu_error_info(hdev);
7672         }
7673 }
7674
7675 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7676                                         struct cpucp_pkt_sync_err *sync_err)
7677 {
7678         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7679
7680         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7681                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7682 }
7683
7684 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7685                                         struct hl_eq_fw_alive *fw_alive)
7686 {
7687         dev_err(hdev->dev,
7688                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7689                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7690                 "Minor" : "Critical", fw_alive->process_id,
7691                 fw_alive->thread_id, fw_alive->uptime_seconds);
7692 }
7693
7694 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7695 {
7696         struct gaudi_device *gaudi = hdev->asic_specific;
7697
7698         /* Unmask all IRQs since some could have been received
7699          * during the soft reset
7700          */
7701         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7702 }
7703
7704 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7705                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7706 {
7707         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7708         int rc = 0;
7709
7710         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7711                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7712                 if (!hbm_ecc_data) {
7713                         dev_err(hdev->dev, "No FW ECC data");
7714                         return 0;
7715                 }
7716
7717                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7718                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7719                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7720                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7721                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7722                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7723                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7724                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7725                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7726                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7727                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7728                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7729                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7730                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7731
7732                 dev_err(hdev->dev,
7733                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7734                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7735                 dev_err(hdev->dev,
7736                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7737                         device, ch, hbm_ecc_data->first_addr, type,
7738                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7739                         hbm_ecc_data->dec_cnt);
7740                 return 0;
7741         }
7742
7743         if (hdev->asic_prop.fw_security_enabled) {
7744                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7745                 return 0;
7746         }
7747
7748         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7749         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7750                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7751                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7752                 if (val) {
7753                         rc = -EIO;
7754                         dev_err(hdev->dev,
7755                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7756                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7757                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7758                                 (val >> 4) & 0x1);
7759
7760                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7761                         dev_err(hdev->dev,
7762                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7763                                 device, ch * 2,
7764                                 RREG32(base + ch * 0x1000 + 0x064),
7765                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7766                                 (val2 & 0xFF0000) >> 16,
7767                                 (val2 & 0xFF000000) >> 24);
7768                 }
7769
7770                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7771                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7772                 if (val) {
7773                         rc = -EIO;
7774                         dev_err(hdev->dev,
7775                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7776                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7777                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7778                                 (val >> 4) & 0x1);
7779
7780                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7781                         dev_err(hdev->dev,
7782                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7783                                 device, ch * 2 + 1,
7784                                 RREG32(base + ch * 0x1000 + 0x074),
7785                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7786                                 (val2 & 0xFF0000) >> 16,
7787                                 (val2 & 0xFF000000) >> 24);
7788                 }
7789
7790                 /* Clear interrupts */
7791                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7792                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7793                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7794                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7795                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7796                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7797         }
7798
7799         val  = RREG32(base + 0x8F30);
7800         val2 = RREG32(base + 0x8F34);
7801         if (val | val2) {
7802                 rc = -EIO;
7803                 dev_err(hdev->dev,
7804                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7805                         device, val, val2);
7806         }
7807         val  = RREG32(base + 0x8F40);
7808         val2 = RREG32(base + 0x8F44);
7809         if (val | val2) {
7810                 rc = -EIO;
7811                 dev_err(hdev->dev,
7812                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7813                         device, val, val2);
7814         }
7815
7816         return rc;
7817 }
7818
7819 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7820 {
7821         switch (hbm_event_type) {
7822         case GAUDI_EVENT_HBM0_SPI_0:
7823         case GAUDI_EVENT_HBM0_SPI_1:
7824                 return 0;
7825         case GAUDI_EVENT_HBM1_SPI_0:
7826         case GAUDI_EVENT_HBM1_SPI_1:
7827                 return 1;
7828         case GAUDI_EVENT_HBM2_SPI_0:
7829         case GAUDI_EVENT_HBM2_SPI_1:
7830                 return 2;
7831         case GAUDI_EVENT_HBM3_SPI_0:
7832         case GAUDI_EVENT_HBM3_SPI_1:
7833                 return 3;
7834         default:
7835                 break;
7836         }
7837
7838         /* Should never happen */
7839         return 0;
7840 }
7841
7842 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7843                                         char *interrupt_name)
7844 {
7845         struct gaudi_device *gaudi = hdev->asic_specific;
7846         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7847         bool soft_reset_required = false;
7848
7849         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7850          * gating, and thus cannot be done in CPU-CP and should be done instead
7851          * by the driver.
7852          */
7853
7854         mutex_lock(&gaudi->clk_gate_mutex);
7855
7856         hdev->asic_funcs->disable_clock_gating(hdev);
7857
7858         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7859                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7860
7861         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7862                 if (tpc_interrupts_cause & BIT(i)) {
7863                         dev_err_ratelimited(hdev->dev,
7864                                         "TPC%d_%s interrupt cause: %s\n",
7865                                         tpc_id, interrupt_name,
7866                                         gaudi_tpc_interrupts_cause[i]);
7867                         /* If this is QM error, we need to soft-reset */
7868                         if (i == 15)
7869                                 soft_reset_required = true;
7870                 }
7871
7872         /* Clear interrupts */
7873         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7874
7875         hdev->asic_funcs->set_clock_gating(hdev);
7876
7877         mutex_unlock(&gaudi->clk_gate_mutex);
7878
7879         return soft_reset_required;
7880 }
7881
7882 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7883 {
7884         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7885 }
7886
7887 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7888 {
7889         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7890 }
7891
7892 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7893                                         u16 event_type)
7894 {
7895         switch (event_type) {
7896         case GAUDI_EVENT_FIX_POWER_ENV_S:
7897                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7898                 dev_info_ratelimited(hdev->dev,
7899                         "Clock throttling due to power consumption\n");
7900                 break;
7901
7902         case GAUDI_EVENT_FIX_POWER_ENV_E:
7903                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7904                 dev_info_ratelimited(hdev->dev,
7905                         "Power envelop is safe, back to optimal clock\n");
7906                 break;
7907
7908         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7909                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7910                 dev_info_ratelimited(hdev->dev,
7911                         "Clock throttling due to overheating\n");
7912                 break;
7913
7914         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7915                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7916                 dev_info_ratelimited(hdev->dev,
7917                         "Thermal envelop is safe, back to optimal clock\n");
7918                 break;
7919
7920         default:
7921                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7922                         event_type);
7923                 break;
7924         }
7925 }
7926
7927 static void gaudi_handle_eqe(struct hl_device *hdev,
7928                                 struct hl_eq_entry *eq_entry)
7929 {
7930         struct gaudi_device *gaudi = hdev->asic_specific;
7931         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7932         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7933                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7934         bool reset_required;
7935         u8 cause;
7936         int rc;
7937
7938         if (event_type >= GAUDI_EVENT_SIZE) {
7939                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7940                                 event_type, GAUDI_EVENT_SIZE - 1);
7941                 return;
7942         }
7943
7944         gaudi->events_stat[event_type]++;
7945         gaudi->events_stat_aggregate[event_type]++;
7946
7947         switch (event_type) {
7948         case GAUDI_EVENT_PCIE_CORE_DERR:
7949         case GAUDI_EVENT_PCIE_IF_DERR:
7950         case GAUDI_EVENT_PCIE_PHY_DERR:
7951         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7952         case GAUDI_EVENT_MME0_ACC_DERR:
7953         case GAUDI_EVENT_MME0_SBAB_DERR:
7954         case GAUDI_EVENT_MME1_ACC_DERR:
7955         case GAUDI_EVENT_MME1_SBAB_DERR:
7956         case GAUDI_EVENT_MME2_ACC_DERR:
7957         case GAUDI_EVENT_MME2_SBAB_DERR:
7958         case GAUDI_EVENT_MME3_ACC_DERR:
7959         case GAUDI_EVENT_MME3_SBAB_DERR:
7960         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7961                 fallthrough;
7962         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7963         case GAUDI_EVENT_PSOC_MEM_DERR:
7964         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7965         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7966         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7967         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7968         case GAUDI_EVENT_MMU_DERR:
7969         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7970                 gaudi_print_irq_info(hdev, event_type, true);
7971                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7972                 goto reset_device;
7973
7974         case GAUDI_EVENT_GIC500:
7975         case GAUDI_EVENT_AXI_ECC:
7976         case GAUDI_EVENT_L2_RAM_ECC:
7977         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7978                 gaudi_print_irq_info(hdev, event_type, false);
7979                 goto reset_device;
7980
7981         case GAUDI_EVENT_HBM0_SPI_0:
7982         case GAUDI_EVENT_HBM1_SPI_0:
7983         case GAUDI_EVENT_HBM2_SPI_0:
7984         case GAUDI_EVENT_HBM3_SPI_0:
7985                 gaudi_print_irq_info(hdev, event_type, false);
7986                 gaudi_hbm_read_interrupts(hdev,
7987                                 gaudi_hbm_event_to_dev(event_type),
7988                                 &eq_entry->hbm_ecc_data);
7989                 goto reset_device;
7990
7991         case GAUDI_EVENT_HBM0_SPI_1:
7992         case GAUDI_EVENT_HBM1_SPI_1:
7993         case GAUDI_EVENT_HBM2_SPI_1:
7994         case GAUDI_EVENT_HBM3_SPI_1:
7995                 gaudi_print_irq_info(hdev, event_type, false);
7996                 gaudi_hbm_read_interrupts(hdev,
7997                                 gaudi_hbm_event_to_dev(event_type),
7998                                 &eq_entry->hbm_ecc_data);
7999                 hl_fw_unmask_irq(hdev, event_type);
8000                 break;
8001
8002         case GAUDI_EVENT_TPC0_DEC:
8003         case GAUDI_EVENT_TPC1_DEC:
8004         case GAUDI_EVENT_TPC2_DEC:
8005         case GAUDI_EVENT_TPC3_DEC:
8006         case GAUDI_EVENT_TPC4_DEC:
8007         case GAUDI_EVENT_TPC5_DEC:
8008         case GAUDI_EVENT_TPC6_DEC:
8009         case GAUDI_EVENT_TPC7_DEC:
8010                 gaudi_print_irq_info(hdev, event_type, true);
8011                 reset_required = gaudi_tpc_read_interrupts(hdev,
8012                                         tpc_dec_event_to_tpc_id(event_type),
8013                                         "AXI_SLV_DEC_Error");
8014                 if (reset_required) {
8015                         dev_err(hdev->dev, "reset required due to %s\n",
8016                                 gaudi_irq_map_table[event_type].name);
8017
8018                         hl_device_reset(hdev, 0);
8019                 } else {
8020                         hl_fw_unmask_irq(hdev, event_type);
8021                 }
8022                 break;
8023
8024         case GAUDI_EVENT_TPC0_KRN_ERR:
8025         case GAUDI_EVENT_TPC1_KRN_ERR:
8026         case GAUDI_EVENT_TPC2_KRN_ERR:
8027         case GAUDI_EVENT_TPC3_KRN_ERR:
8028         case GAUDI_EVENT_TPC4_KRN_ERR:
8029         case GAUDI_EVENT_TPC5_KRN_ERR:
8030         case GAUDI_EVENT_TPC6_KRN_ERR:
8031         case GAUDI_EVENT_TPC7_KRN_ERR:
8032                 gaudi_print_irq_info(hdev, event_type, true);
8033                 reset_required = gaudi_tpc_read_interrupts(hdev,
8034                                         tpc_krn_event_to_tpc_id(event_type),
8035                                         "KRN_ERR");
8036                 if (reset_required) {
8037                         dev_err(hdev->dev, "reset required due to %s\n",
8038                                 gaudi_irq_map_table[event_type].name);
8039
8040                         hl_device_reset(hdev, 0);
8041                 } else {
8042                         hl_fw_unmask_irq(hdev, event_type);
8043                 }
8044                 break;
8045
8046         case GAUDI_EVENT_PCIE_CORE_SERR:
8047         case GAUDI_EVENT_PCIE_IF_SERR:
8048         case GAUDI_EVENT_PCIE_PHY_SERR:
8049         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8050         case GAUDI_EVENT_MME0_ACC_SERR:
8051         case GAUDI_EVENT_MME0_SBAB_SERR:
8052         case GAUDI_EVENT_MME1_ACC_SERR:
8053         case GAUDI_EVENT_MME1_SBAB_SERR:
8054         case GAUDI_EVENT_MME2_ACC_SERR:
8055         case GAUDI_EVENT_MME2_SBAB_SERR:
8056         case GAUDI_EVENT_MME3_ACC_SERR:
8057         case GAUDI_EVENT_MME3_SBAB_SERR:
8058         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8059         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8060         case GAUDI_EVENT_PSOC_MEM_SERR:
8061         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8062         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8063         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8064         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8065                 fallthrough;
8066         case GAUDI_EVENT_MMU_SERR:
8067                 gaudi_print_irq_info(hdev, event_type, true);
8068                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8069                 hl_fw_unmask_irq(hdev, event_type);
8070                 break;
8071
8072         case GAUDI_EVENT_PCIE_DEC:
8073         case GAUDI_EVENT_MME0_WBC_RSP:
8074         case GAUDI_EVENT_MME0_SBAB0_RSP:
8075         case GAUDI_EVENT_MME1_WBC_RSP:
8076         case GAUDI_EVENT_MME1_SBAB0_RSP:
8077         case GAUDI_EVENT_MME2_WBC_RSP:
8078         case GAUDI_EVENT_MME2_SBAB0_RSP:
8079         case GAUDI_EVENT_MME3_WBC_RSP:
8080         case GAUDI_EVENT_MME3_SBAB0_RSP:
8081         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8082         case GAUDI_EVENT_PSOC_AXI_DEC:
8083         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8084         case GAUDI_EVENT_MMU_PAGE_FAULT:
8085         case GAUDI_EVENT_MMU_WR_PERM:
8086         case GAUDI_EVENT_RAZWI_OR_ADC:
8087         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8088         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8089         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8090                 fallthrough;
8091         case GAUDI_EVENT_NIC0_QM0:
8092         case GAUDI_EVENT_NIC0_QM1:
8093         case GAUDI_EVENT_NIC1_QM0:
8094         case GAUDI_EVENT_NIC1_QM1:
8095         case GAUDI_EVENT_NIC2_QM0:
8096         case GAUDI_EVENT_NIC2_QM1:
8097         case GAUDI_EVENT_NIC3_QM0:
8098         case GAUDI_EVENT_NIC3_QM1:
8099         case GAUDI_EVENT_NIC4_QM0:
8100         case GAUDI_EVENT_NIC4_QM1:
8101         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8102                 gaudi_print_irq_info(hdev, event_type, true);
8103                 gaudi_handle_qman_err(hdev, event_type);
8104                 hl_fw_unmask_irq(hdev, event_type);
8105                 break;
8106
8107         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8108                 gaudi_print_irq_info(hdev, event_type, true);
8109                 goto reset_device;
8110
8111         case GAUDI_EVENT_TPC0_BMON_SPMU:
8112         case GAUDI_EVENT_TPC1_BMON_SPMU:
8113         case GAUDI_EVENT_TPC2_BMON_SPMU:
8114         case GAUDI_EVENT_TPC3_BMON_SPMU:
8115         case GAUDI_EVENT_TPC4_BMON_SPMU:
8116         case GAUDI_EVENT_TPC5_BMON_SPMU:
8117         case GAUDI_EVENT_TPC6_BMON_SPMU:
8118         case GAUDI_EVENT_TPC7_BMON_SPMU:
8119         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8120                 gaudi_print_irq_info(hdev, event_type, false);
8121                 hl_fw_unmask_irq(hdev, event_type);
8122                 break;
8123
8124         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8125                 gaudi_print_irq_info(hdev, event_type, false);
8126                 gaudi_print_sm_sei_info(hdev, event_type,
8127                                         &eq_entry->sm_sei_data);
8128                 rc = hl_state_dump(hdev);
8129                 if (rc)
8130                         dev_err(hdev->dev,
8131                                 "Error during system state dump %d\n", rc);
8132                 hl_fw_unmask_irq(hdev, event_type);
8133                 break;
8134
8135         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8136                 gaudi_print_clk_change_info(hdev, event_type);
8137                 hl_fw_unmask_irq(hdev, event_type);
8138                 break;
8139
8140         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8141                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8142                 dev_err(hdev->dev,
8143                         "Received high temp H/W interrupt %d (cause %d)\n",
8144                         event_type, cause);
8145                 break;
8146
8147         case GAUDI_EVENT_DEV_RESET_REQ:
8148                 gaudi_print_irq_info(hdev, event_type, false);
8149                 goto reset_device;
8150
8151         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8152                 gaudi_print_irq_info(hdev, event_type, false);
8153                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8154                 goto reset_device;
8155
8156         case GAUDI_EVENT_FW_ALIVE_S:
8157                 gaudi_print_irq_info(hdev, event_type, false);
8158                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8159                 goto reset_device;
8160
8161         default:
8162                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8163                                 event_type);
8164                 break;
8165         }
8166
8167         return;
8168
8169 reset_device:
8170         if (hdev->asic_prop.fw_security_enabled)
8171                 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
8172         else if (hdev->hard_reset_on_fw_events)
8173                 hl_device_reset(hdev, HL_RESET_HARD);
8174         else
8175                 hl_fw_unmask_irq(hdev, event_type);
8176 }
8177
8178 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8179                                         u32 *size)
8180 {
8181         struct gaudi_device *gaudi = hdev->asic_specific;
8182
8183         if (aggregate) {
8184                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8185                 return gaudi->events_stat_aggregate;
8186         }
8187
8188         *size = (u32) sizeof(gaudi->events_stat);
8189         return gaudi->events_stat;
8190 }
8191
8192 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8193                                         u32 flags)
8194 {
8195         struct gaudi_device *gaudi = hdev->asic_specific;
8196         u32 status, timeout_usec;
8197         int rc;
8198
8199         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8200                 hdev->hard_reset_pending)
8201                 return 0;
8202
8203         if (hdev->pldm)
8204                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8205         else
8206                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8207
8208         /* L0 & L1 invalidation */
8209         WREG32(mmSTLB_INV_PS, 3);
8210         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8211         WREG32(mmSTLB_INV_PS, 2);
8212
8213         rc = hl_poll_timeout(
8214                 hdev,
8215                 mmSTLB_INV_PS,
8216                 status,
8217                 !status,
8218                 1000,
8219                 timeout_usec);
8220
8221         WREG32(mmSTLB_INV_SET, 0);
8222
8223         if (rc) {
8224                 dev_err_ratelimited(hdev->dev,
8225                                         "MMU cache invalidation timeout\n");
8226                 hl_device_reset(hdev, HL_RESET_HARD);
8227         }
8228
8229         return rc;
8230 }
8231
8232 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8233                                                 bool is_hard, u32 flags,
8234                                                 u32 asid, u64 va, u64 size)
8235 {
8236         /* Treat as invalidate all because there is no range invalidation
8237          * in Gaudi
8238          */
8239         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8240 }
8241
8242 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8243                                         u32 asid, u64 phys_addr)
8244 {
8245         u32 status, timeout_usec;
8246         int rc;
8247
8248         if (hdev->pldm)
8249                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8250         else
8251                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8252
8253         WREG32(MMU_ASID, asid);
8254         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8255         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8256         WREG32(MMU_BUSY, 0x80000000);
8257
8258         rc = hl_poll_timeout(
8259                 hdev,
8260                 MMU_BUSY,
8261                 status,
8262                 !(status & 0x80000000),
8263                 1000,
8264                 timeout_usec);
8265
8266         if (rc) {
8267                 dev_err(hdev->dev,
8268                         "Timeout during MMU hop0 config of asid %d\n", asid);
8269                 return rc;
8270         }
8271
8272         return 0;
8273 }
8274
8275 static int gaudi_send_heartbeat(struct hl_device *hdev)
8276 {
8277         struct gaudi_device *gaudi = hdev->asic_specific;
8278
8279         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8280                 return 0;
8281
8282         return hl_fw_send_heartbeat(hdev);
8283 }
8284
8285 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8286 {
8287         struct gaudi_device *gaudi = hdev->asic_specific;
8288         struct asic_fixed_properties *prop = &hdev->asic_prop;
8289         int rc;
8290
8291         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8292                 return 0;
8293
8294         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8295                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8296                                         mmCPU_BOOT_ERR1);
8297         if (rc)
8298                 return rc;
8299
8300         if (!strlen(prop->cpucp_info.card_name))
8301                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8302                                 CARD_NAME_MAX_LEN);
8303
8304         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8305
8306         set_default_power_values(hdev);
8307
8308         hdev->max_power = prop->max_power_default;
8309
8310         return 0;
8311 }
8312
8313 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8314                                         u8 mask_len, struct seq_file *s)
8315 {
8316         struct gaudi_device *gaudi = hdev->asic_specific;
8317         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8318         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8319         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8320         unsigned long *mask = (unsigned long *)mask_arr;
8321         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8322         bool is_idle = true, is_eng_idle, is_slave;
8323         u64 offset;
8324         int i, dma_id, port;
8325
8326         mutex_lock(&gaudi->clk_gate_mutex);
8327
8328         hdev->asic_funcs->disable_clock_gating(hdev);
8329
8330         if (s)
8331                 seq_puts(s,
8332                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8333                         "---  -------  ------------  ----------  -------------\n");
8334
8335         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8336                 dma_id = gaudi_dma_assignment[i];
8337                 offset = dma_id * DMA_QMAN_OFFSET;
8338
8339                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8340                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8341                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8342                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8343                                 IS_DMA_IDLE(dma_core_sts0);
8344                 is_idle &= is_eng_idle;
8345
8346                 if (mask && !is_eng_idle)
8347                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8348                 if (s)
8349                         seq_printf(s, fmt, dma_id,
8350                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8351                                 qm_cgm_sts, dma_core_sts0);
8352         }
8353
8354         if (s)
8355                 seq_puts(s,
8356                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8357                         "---  -------  ------------  ----------  ----------\n");
8358
8359         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8360                 offset = i * TPC_QMAN_OFFSET;
8361                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8362                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8363                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8364                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8365                                 IS_TPC_IDLE(tpc_cfg_sts);
8366                 is_idle &= is_eng_idle;
8367
8368                 if (mask && !is_eng_idle)
8369                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8370                 if (s)
8371                         seq_printf(s, fmt, i,
8372                                 is_eng_idle ? "Y" : "N",
8373                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8374         }
8375
8376         if (s)
8377                 seq_puts(s,
8378                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8379                         "---  -------  ------------  ----------  -----------\n");
8380
8381         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8382                 offset = i * MME_QMAN_OFFSET;
8383                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8384                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8385
8386                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8387                 is_slave = i % 2;
8388                 if (!is_slave) {
8389                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8390                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8391                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8392                 }
8393
8394                 is_idle &= is_eng_idle;
8395
8396                 if (mask && !is_eng_idle)
8397                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8398                 if (s) {
8399                         if (!is_slave)
8400                                 seq_printf(s, fmt, i,
8401                                         is_eng_idle ? "Y" : "N",
8402                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8403                         else
8404                                 seq_printf(s, mme_slave_fmt, i,
8405                                         is_eng_idle ? "Y" : "N", "-",
8406                                         "-", mme_arch_sts);
8407                 }
8408         }
8409
8410         if (s)
8411                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8412                                 "---  -------  ------------  ----------\n");
8413
8414         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8415                 offset = i * NIC_MACRO_QMAN_OFFSET;
8416                 port = 2 * i;
8417                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8418                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8419                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8420                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8421                         is_idle &= is_eng_idle;
8422
8423                         if (mask && !is_eng_idle)
8424                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8425                         if (s)
8426                                 seq_printf(s, nic_fmt, port,
8427                                                 is_eng_idle ? "Y" : "N",
8428                                                 qm_glbl_sts0, qm_cgm_sts);
8429                 }
8430
8431                 port = 2 * i + 1;
8432                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8433                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8434                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8435                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8436                         is_idle &= is_eng_idle;
8437
8438                         if (mask && !is_eng_idle)
8439                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8440                         if (s)
8441                                 seq_printf(s, nic_fmt, port,
8442                                                 is_eng_idle ? "Y" : "N",
8443                                                 qm_glbl_sts0, qm_cgm_sts);
8444                 }
8445         }
8446
8447         if (s)
8448                 seq_puts(s, "\n");
8449
8450         hdev->asic_funcs->set_clock_gating(hdev);
8451
8452         mutex_unlock(&gaudi->clk_gate_mutex);
8453
8454         return is_idle;
8455 }
8456
8457 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8458         __acquires(&gaudi->hw_queues_lock)
8459 {
8460         struct gaudi_device *gaudi = hdev->asic_specific;
8461
8462         spin_lock(&gaudi->hw_queues_lock);
8463 }
8464
8465 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8466         __releases(&gaudi->hw_queues_lock)
8467 {
8468         struct gaudi_device *gaudi = hdev->asic_specific;
8469
8470         spin_unlock(&gaudi->hw_queues_lock);
8471 }
8472
8473 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8474 {
8475         return hdev->pdev->device;
8476 }
8477
8478 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8479                                 size_t max_size)
8480 {
8481         struct gaudi_device *gaudi = hdev->asic_specific;
8482
8483         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8484                 return 0;
8485
8486         return hl_fw_get_eeprom_data(hdev, data, max_size);
8487 }
8488
8489 /*
8490  * this function should be used only during initialization and/or after reset,
8491  * when there are no active users.
8492  */
8493 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8494                                 u32 tpc_id)
8495 {
8496         struct gaudi_device *gaudi = hdev->asic_specific;
8497         u64 kernel_timeout;
8498         u32 status, offset;
8499         int rc;
8500
8501         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8502
8503         if (hdev->pldm)
8504                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8505         else
8506                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8507
8508         mutex_lock(&gaudi->clk_gate_mutex);
8509
8510         hdev->asic_funcs->disable_clock_gating(hdev);
8511
8512         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8513                         lower_32_bits(tpc_kernel));
8514         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8515                         upper_32_bits(tpc_kernel));
8516
8517         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8518                         lower_32_bits(tpc_kernel));
8519         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8520                         upper_32_bits(tpc_kernel));
8521         /* set a valid LUT pointer, content is of no significance */
8522         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8523                         lower_32_bits(tpc_kernel));
8524         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8525                         upper_32_bits(tpc_kernel));
8526
8527         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8528                         lower_32_bits(CFG_BASE +
8529                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8530
8531         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8532                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8533                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8534         /* wait a bit for the engine to start executing */
8535         usleep_range(1000, 1500);
8536
8537         /* wait until engine has finished executing */
8538         rc = hl_poll_timeout(
8539                 hdev,
8540                 mmTPC0_CFG_STATUS + offset,
8541                 status,
8542                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8543                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8544                 1000,
8545                 kernel_timeout);
8546
8547         if (rc) {
8548                 dev_err(hdev->dev,
8549                         "Timeout while waiting for TPC%d icache prefetch\n",
8550                         tpc_id);
8551                 hdev->asic_funcs->set_clock_gating(hdev);
8552                 mutex_unlock(&gaudi->clk_gate_mutex);
8553                 return -EIO;
8554         }
8555
8556         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8557                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8558
8559         /* wait a bit for the engine to start executing */
8560         usleep_range(1000, 1500);
8561
8562         /* wait until engine has finished executing */
8563         rc = hl_poll_timeout(
8564                 hdev,
8565                 mmTPC0_CFG_STATUS + offset,
8566                 status,
8567                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8568                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8569                 1000,
8570                 kernel_timeout);
8571
8572         if (rc) {
8573                 dev_err(hdev->dev,
8574                         "Timeout while waiting for TPC%d vector pipe\n",
8575                         tpc_id);
8576                 hdev->asic_funcs->set_clock_gating(hdev);
8577                 mutex_unlock(&gaudi->clk_gate_mutex);
8578                 return -EIO;
8579         }
8580
8581         rc = hl_poll_timeout(
8582                 hdev,
8583                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8584                 status,
8585                 (status == 0),
8586                 1000,
8587                 kernel_timeout);
8588
8589         hdev->asic_funcs->set_clock_gating(hdev);
8590         mutex_unlock(&gaudi->clk_gate_mutex);
8591
8592         if (rc) {
8593                 dev_err(hdev->dev,
8594                         "Timeout while waiting for TPC%d kernel to execute\n",
8595                         tpc_id);
8596                 return -EIO;
8597         }
8598
8599         return 0;
8600 }
8601
8602 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8603                 struct hl_ctx *ctx)
8604 {
8605         struct gaudi_device *gaudi = hdev->asic_specific;
8606         int min_alloc_order, rc, collective_cb_size;
8607
8608         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8609                 return 0;
8610
8611         hdev->internal_cb_pool_virt_addr =
8612                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8613                                         HOST_SPACE_INTERNAL_CB_SZ,
8614                                         &hdev->internal_cb_pool_dma_addr,
8615                                         GFP_KERNEL | __GFP_ZERO);
8616
8617         if (!hdev->internal_cb_pool_virt_addr)
8618                 return -ENOMEM;
8619
8620         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8621                         sizeof(struct packet_fence);
8622         min_alloc_order = ilog2(collective_cb_size);
8623
8624         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8625         if (!hdev->internal_cb_pool) {
8626                 dev_err(hdev->dev,
8627                         "Failed to create internal CB pool\n");
8628                 rc = -ENOMEM;
8629                 goto free_internal_cb_pool;
8630         }
8631
8632         rc = gen_pool_add(hdev->internal_cb_pool,
8633                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8634                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8635         if (rc) {
8636                 dev_err(hdev->dev,
8637                         "Failed to add memory to internal CB pool\n");
8638                 rc = -EFAULT;
8639                 goto destroy_internal_cb_pool;
8640         }
8641
8642         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8643                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8644                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8645
8646         if (!hdev->internal_cb_va_base) {
8647                 rc = -ENOMEM;
8648                 goto destroy_internal_cb_pool;
8649         }
8650
8651         mutex_lock(&ctx->mmu_lock);
8652         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8653                         hdev->internal_cb_pool_dma_addr,
8654                         HOST_SPACE_INTERNAL_CB_SZ);
8655
8656         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8657         mutex_unlock(&ctx->mmu_lock);
8658
8659         if (rc)
8660                 goto unreserve_internal_cb_pool;
8661
8662         return 0;
8663
8664 unreserve_internal_cb_pool:
8665         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8666                         HOST_SPACE_INTERNAL_CB_SZ);
8667 destroy_internal_cb_pool:
8668         gen_pool_destroy(hdev->internal_cb_pool);
8669 free_internal_cb_pool:
8670         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8671                         HOST_SPACE_INTERNAL_CB_SZ,
8672                         hdev->internal_cb_pool_virt_addr,
8673                         hdev->internal_cb_pool_dma_addr);
8674
8675         return rc;
8676 }
8677
8678 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8679                 struct hl_ctx *ctx)
8680 {
8681         struct gaudi_device *gaudi = hdev->asic_specific;
8682
8683         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8684                 return;
8685
8686         mutex_lock(&ctx->mmu_lock);
8687         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8688                         HOST_SPACE_INTERNAL_CB_SZ);
8689         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8690                         HOST_SPACE_INTERNAL_CB_SZ);
8691         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8692         mutex_unlock(&ctx->mmu_lock);
8693
8694         gen_pool_destroy(hdev->internal_cb_pool);
8695
8696         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8697                         HOST_SPACE_INTERNAL_CB_SZ,
8698                         hdev->internal_cb_pool_virt_addr,
8699                         hdev->internal_cb_pool_dma_addr);
8700 }
8701
8702 static int gaudi_ctx_init(struct hl_ctx *ctx)
8703 {
8704         int rc;
8705
8706         if (ctx->asid == HL_KERNEL_ASID_ID)
8707                 return 0;
8708
8709         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8710         if (rc)
8711                 return rc;
8712
8713         rc = gaudi_restore_user_registers(ctx->hdev);
8714         if (rc)
8715                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8716
8717         return rc;
8718 }
8719
8720 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8721 {
8722         if (ctx->asid == HL_KERNEL_ASID_ID)
8723                 return;
8724
8725         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8726 }
8727
8728 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8729 {
8730         return gaudi_cq_assignment[cq_idx];
8731 }
8732
8733 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8734 {
8735         return sizeof(struct packet_msg_short) +
8736                         sizeof(struct packet_msg_prot) * 2;
8737 }
8738
8739 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8740 {
8741         return sizeof(struct packet_msg_short) * 4 +
8742                         sizeof(struct packet_fence) +
8743                         sizeof(struct packet_msg_prot) * 2;
8744 }
8745
8746 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8747 {
8748         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8749 }
8750
8751 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8752                                 u32 size, bool eb)
8753 {
8754         struct hl_cb *cb = (struct hl_cb *) data;
8755         struct packet_msg_short *pkt;
8756         u32 value, ctl, pkt_size = sizeof(*pkt);
8757
8758         pkt = cb->kernel_address + size;
8759         memset(pkt, 0, pkt_size);
8760
8761         /* Inc by 1, Mode ADD */
8762         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8763         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8764
8765         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8766         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8767         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8768         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8769         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8770         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8771         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8772
8773         pkt->value = cpu_to_le32(value);
8774         pkt->ctl = cpu_to_le32(ctl);
8775
8776         return size + pkt_size;
8777 }
8778
8779 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8780                                         u16 addr)
8781 {
8782         u32 ctl, pkt_size = sizeof(*pkt);
8783
8784         memset(pkt, 0, pkt_size);
8785
8786         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8787         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8788         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8789         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8790         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8791         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8792
8793         pkt->value = cpu_to_le32(value);
8794         pkt->ctl = cpu_to_le32(ctl);
8795
8796         return pkt_size;
8797 }
8798
8799 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8800                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8801                 u16 sob_val, u16 mon_id)
8802 {
8803         u64 monitor_base;
8804         u32 ctl, value, pkt_size = sizeof(*pkt);
8805         u16 msg_addr_offset;
8806         u8 mask;
8807
8808         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8809                 dev_err(hdev->dev,
8810                         "sob_base %u (mask %#x) is not valid\n",
8811                         sob_base, sob_mask);
8812                 return 0;
8813         }
8814
8815         /*
8816          * monitor_base should be the content of the base0 address registers,
8817          * so it will be added to the msg short offsets
8818          */
8819         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8820
8821         msg_addr_offset =
8822                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8823                                 monitor_base;
8824
8825         memset(pkt, 0, pkt_size);
8826
8827         /* Monitor config packet: bind the monitor to a sync object */
8828         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8829         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8830         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8831                         0); /* GREATER OR EQUAL*/
8832         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8833
8834         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8835         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8836         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8837         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8838         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8839         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8840         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8841
8842         pkt->value = cpu_to_le32(value);
8843         pkt->ctl = cpu_to_le32(ctl);
8844
8845         return pkt_size;
8846 }
8847
8848 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8849 {
8850         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8851
8852         memset(pkt, 0, pkt_size);
8853
8854         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8855         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8856         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8857
8858         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8859         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8860         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8861         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8862
8863         pkt->cfg = cpu_to_le32(cfg);
8864         pkt->ctl = cpu_to_le32(ctl);
8865
8866         return pkt_size;
8867 }
8868
8869 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8870 {
8871         u32 offset, nic_index;
8872
8873         switch (queue_id) {
8874         case GAUDI_QUEUE_ID_DMA_0_0:
8875                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8876                 break;
8877         case GAUDI_QUEUE_ID_DMA_0_1:
8878                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8879                 break;
8880         case GAUDI_QUEUE_ID_DMA_0_2:
8881                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8882                 break;
8883         case GAUDI_QUEUE_ID_DMA_0_3:
8884                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8885                 break;
8886         case GAUDI_QUEUE_ID_DMA_1_0:
8887                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8888                 break;
8889         case GAUDI_QUEUE_ID_DMA_1_1:
8890                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8891                 break;
8892         case GAUDI_QUEUE_ID_DMA_1_2:
8893                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8894                 break;
8895         case GAUDI_QUEUE_ID_DMA_1_3:
8896                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8897                 break;
8898         case GAUDI_QUEUE_ID_DMA_5_0:
8899                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8900                 break;
8901         case GAUDI_QUEUE_ID_DMA_5_1:
8902                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8903                 break;
8904         case GAUDI_QUEUE_ID_DMA_5_2:
8905                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8906                 break;
8907         case GAUDI_QUEUE_ID_DMA_5_3:
8908                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8909                 break;
8910         case GAUDI_QUEUE_ID_TPC_7_0:
8911                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8912                 break;
8913         case GAUDI_QUEUE_ID_TPC_7_1:
8914                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8915                 break;
8916         case GAUDI_QUEUE_ID_TPC_7_2:
8917                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8918                 break;
8919         case GAUDI_QUEUE_ID_TPC_7_3:
8920                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8921                 break;
8922         case GAUDI_QUEUE_ID_NIC_0_0:
8923         case GAUDI_QUEUE_ID_NIC_1_0:
8924         case GAUDI_QUEUE_ID_NIC_2_0:
8925         case GAUDI_QUEUE_ID_NIC_3_0:
8926         case GAUDI_QUEUE_ID_NIC_4_0:
8927         case GAUDI_QUEUE_ID_NIC_5_0:
8928         case GAUDI_QUEUE_ID_NIC_6_0:
8929         case GAUDI_QUEUE_ID_NIC_7_0:
8930         case GAUDI_QUEUE_ID_NIC_8_0:
8931         case GAUDI_QUEUE_ID_NIC_9_0:
8932                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8933                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8934                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8935                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8936                 break;
8937         case GAUDI_QUEUE_ID_NIC_0_1:
8938         case GAUDI_QUEUE_ID_NIC_1_1:
8939         case GAUDI_QUEUE_ID_NIC_2_1:
8940         case GAUDI_QUEUE_ID_NIC_3_1:
8941         case GAUDI_QUEUE_ID_NIC_4_1:
8942         case GAUDI_QUEUE_ID_NIC_5_1:
8943         case GAUDI_QUEUE_ID_NIC_6_1:
8944         case GAUDI_QUEUE_ID_NIC_7_1:
8945         case GAUDI_QUEUE_ID_NIC_8_1:
8946         case GAUDI_QUEUE_ID_NIC_9_1:
8947                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8948                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8949                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8950                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8951                 break;
8952         case GAUDI_QUEUE_ID_NIC_0_2:
8953         case GAUDI_QUEUE_ID_NIC_1_2:
8954         case GAUDI_QUEUE_ID_NIC_2_2:
8955         case GAUDI_QUEUE_ID_NIC_3_2:
8956         case GAUDI_QUEUE_ID_NIC_4_2:
8957         case GAUDI_QUEUE_ID_NIC_5_2:
8958         case GAUDI_QUEUE_ID_NIC_6_2:
8959         case GAUDI_QUEUE_ID_NIC_7_2:
8960         case GAUDI_QUEUE_ID_NIC_8_2:
8961         case GAUDI_QUEUE_ID_NIC_9_2:
8962                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8963                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8964                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8965                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8966                 break;
8967         case GAUDI_QUEUE_ID_NIC_0_3:
8968         case GAUDI_QUEUE_ID_NIC_1_3:
8969         case GAUDI_QUEUE_ID_NIC_2_3:
8970         case GAUDI_QUEUE_ID_NIC_3_3:
8971         case GAUDI_QUEUE_ID_NIC_4_3:
8972         case GAUDI_QUEUE_ID_NIC_5_3:
8973         case GAUDI_QUEUE_ID_NIC_6_3:
8974         case GAUDI_QUEUE_ID_NIC_7_3:
8975         case GAUDI_QUEUE_ID_NIC_8_3:
8976         case GAUDI_QUEUE_ID_NIC_9_3:
8977                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8978                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8979                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8980                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8981                 break;
8982         default:
8983                 return -EINVAL;
8984         }
8985
8986         *addr = CFG_BASE + offset;
8987
8988         return 0;
8989 }
8990
8991 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8992 {
8993         u64 monitor_base;
8994         u32 size = 0;
8995         u16 msg_addr_offset;
8996
8997         /*
8998          * monitor_base should be the content of the base0 address registers,
8999          * so it will be added to the msg short offsets
9000          */
9001         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9002
9003         /* First monitor config packet: low address of the sync */
9004         msg_addr_offset =
9005                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9006                                 monitor_base;
9007
9008         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9009                                         msg_addr_offset);
9010
9011         /* Second monitor config packet: high address of the sync */
9012         msg_addr_offset =
9013                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9014                                 monitor_base;
9015
9016         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9017                                         msg_addr_offset);
9018
9019         /*
9020          * Third monitor config packet: the payload, i.e. what to write when the
9021          * sync triggers
9022          */
9023         msg_addr_offset =
9024                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9025                                 monitor_base;
9026
9027         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9028
9029         return size;
9030 }
9031
9032 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9033                                 struct hl_gen_wait_properties *prop)
9034 {
9035         struct hl_cb *cb = (struct hl_cb *) prop->data;
9036         void *buf = cb->kernel_address;
9037         u64 fence_addr = 0;
9038         u32 size = prop->size;
9039
9040         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9041                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9042                                 prop->q_idx);
9043                 return 0;
9044         }
9045
9046         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9047         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9048                         prop->sob_mask, prop->sob_val, prop->mon_id);
9049         size += gaudi_add_fence_pkt(buf + size);
9050
9051         return size;
9052 }
9053
9054 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9055 {
9056         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9057
9058         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9059                 hw_sob->sob_id);
9060
9061         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9062                         hw_sob->sob_id * 4, 0);
9063
9064         kref_init(&hw_sob->kref);
9065 }
9066
9067 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9068 {
9069         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9070                                                         HL_POWER9_HOST_MAGIC) {
9071                 hdev->power9_64bit_dma_enable = 1;
9072                 hdev->dma_mask = 64;
9073         } else {
9074                 hdev->power9_64bit_dma_enable = 0;
9075                 hdev->dma_mask = 48;
9076         }
9077 }
9078
9079 static u64 gaudi_get_device_time(struct hl_device *hdev)
9080 {
9081         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9082
9083         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9084 }
9085
9086 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9087                                 u32 *block_size, u32 *block_id)
9088 {
9089         return -EPERM;
9090 }
9091
9092 static int gaudi_block_mmap(struct hl_device *hdev,
9093                                 struct vm_area_struct *vma,
9094                                 u32 block_id, u32 block_size)
9095 {
9096         return -EPERM;
9097 }
9098
9099 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9100 {
9101         struct cpu_dyn_regs *dyn_regs =
9102                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9103         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9104                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9105                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9106
9107         WREG32(irq_handler_offset,
9108                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9109 }
9110
9111 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9112 {
9113         switch (pll_idx) {
9114         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9115         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9116         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9117         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9118         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9119         case HL_GAUDI_MME_PLL: return MME_PLL;
9120         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9121         case HL_GAUDI_IF_PLL: return IF_PLL;
9122         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9123         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9124         default: return -EINVAL;
9125         }
9126 }
9127
9128 static int gaudi_add_sync_to_engine_map_entry(
9129         struct hl_sync_to_engine_map *map, u32 reg_value,
9130         enum hl_sync_engine_type engine_type, u32 engine_id)
9131 {
9132         struct hl_sync_to_engine_map_entry *entry;
9133
9134         /* Reg value represents a partial address of sync object,
9135          * it is used as unique identifier. For this we need to
9136          * clear the cutoff cfg base bits from the value.
9137          */
9138         if (reg_value == 0 || reg_value == 0xffffffff)
9139                 return 0;
9140         reg_value -= (u32)CFG_BASE;
9141
9142         /* create a new hash entry */
9143         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9144         if (!entry)
9145                 return -ENOMEM;
9146         entry->engine_type = engine_type;
9147         entry->engine_id = engine_id;
9148         entry->sync_id = reg_value;
9149         hash_add(map->tb, &entry->node, reg_value);
9150
9151         return 0;
9152 }
9153
9154 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9155                                 struct hl_sync_to_engine_map *map)
9156 {
9157         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9158         struct gaudi_device *gaudi = hdev->asic_specific;
9159         int i, j, rc;
9160         u32 reg_value;
9161
9162         /* Iterate over TPC engines */
9163         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9164                 /* TPC registered must be accessed with clock gating disabled */
9165                 mutex_lock(&gaudi->clk_gate_mutex);
9166                 hdev->asic_funcs->disable_clock_gating(hdev);
9167
9168                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9169                                         sds->props[SP_NEXT_TPC] * i);
9170
9171                 /* We can reenable clock_gating */
9172                 hdev->asic_funcs->set_clock_gating(hdev);
9173                 mutex_unlock(&gaudi->clk_gate_mutex);
9174
9175                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9176                                                         ENGINE_TPC, i);
9177                 if (rc)
9178                         goto free_sync_to_engine_map;
9179         }
9180
9181         /* Iterate over MME engines */
9182         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9183                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9184                         /* MME registered must be accessed with clock gating
9185                          * disabled
9186                          */
9187                         mutex_lock(&gaudi->clk_gate_mutex);
9188                         hdev->asic_funcs->disable_clock_gating(hdev);
9189
9190                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9191                                                 sds->props[SP_NEXT_MME] * i +
9192                                                 j * sizeof(u32));
9193
9194                         /* We can reenable clock_gating */
9195                         hdev->asic_funcs->set_clock_gating(hdev);
9196                         mutex_unlock(&gaudi->clk_gate_mutex);
9197
9198                         rc = gaudi_add_sync_to_engine_map_entry(
9199                                 map, reg_value, ENGINE_MME,
9200                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9201                         if (rc)
9202                                 goto free_sync_to_engine_map;
9203                 }
9204         }
9205
9206         /* Iterate over DMA engines */
9207         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9208                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9209                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9210                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9211                                                         ENGINE_DMA, i);
9212                 if (rc)
9213                         goto free_sync_to_engine_map;
9214         }
9215
9216         return 0;
9217
9218 free_sync_to_engine_map:
9219         hl_state_dump_free_sync_to_engine_map(map);
9220
9221         return rc;
9222 }
9223
9224 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9225 {
9226         return FIELD_GET(
9227                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9228                 mon->status);
9229 }
9230
9231 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9232 {
9233         const size_t max_write = 10;
9234         u32 gid, mask, sob;
9235         int i, offset;
9236
9237         /* Sync object ID is calculated as follows:
9238          * (8 * group_id + cleared bits in mask)
9239          */
9240         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9241                         mon->arm_data);
9242         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9243                         mon->arm_data);
9244
9245         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9246                 max_write; mask >>= 1, i++) {
9247                 if (!(mask & 1)) {
9248                         sob = gid * MONITOR_MAX_SOBS + i;
9249
9250                         if (offset > 0)
9251                                 offset += snprintf(sobs + offset, max_write,
9252                                                         ", ");
9253
9254                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9255                 }
9256         }
9257 }
9258
9259 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9260                                 struct hl_device *hdev,
9261                                 struct hl_mon_state_dump *mon)
9262 {
9263         const char *name;
9264         char scratch_buf1[BIN_REG_STRING_SIZE],
9265                 scratch_buf2[BIN_REG_STRING_SIZE];
9266         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9267
9268         name = hl_state_dump_get_monitor_name(hdev, mon);
9269         if (!name)
9270                 name = "";
9271
9272         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9273
9274         return hl_snprintf_resize(
9275                 buf, size, offset,
9276                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9277                 mon->id, name,
9278                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9279                                 mon->arm_data),
9280                 hl_format_as_binary(
9281                         scratch_buf1, sizeof(scratch_buf1),
9282                         FIELD_GET(
9283                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9284                                 mon->arm_data)),
9285                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9286                                 mon->arm_data),
9287                 mon->wr_data,
9288                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9289                 hl_format_as_binary(
9290                         scratch_buf2, sizeof(scratch_buf2),
9291                         FIELD_GET(
9292                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9293                                 mon->status)),
9294                 monitored_sobs);
9295 }
9296
9297
9298 static int gaudi_print_fences_single_engine(
9299         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9300         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9301         size_t *size, size_t *offset)
9302 {
9303         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9304         int rc = -ENOMEM, i;
9305         u32 *statuses, *fences;
9306
9307         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9308                         sizeof(*statuses), GFP_KERNEL);
9309         if (!statuses)
9310                 goto out;
9311
9312         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9313                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9314                          sizeof(*fences), GFP_KERNEL);
9315         if (!fences)
9316                 goto free_status;
9317
9318         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9319                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9320
9321         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9322                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9323                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9324
9325         /* The actual print */
9326         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9327                 u32 fence_id;
9328                 u64 fence_cnt, fence_rdata;
9329                 const char *engine_name;
9330
9331                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9332                         statuses[i]))
9333                         continue;
9334
9335                 fence_id =
9336                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9337                 fence_cnt = base_offset + CFG_BASE +
9338                         sizeof(u32) *
9339                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9340                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9341                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9342                 engine_name = hl_sync_engine_to_string(engine_type);
9343
9344                 rc = hl_snprintf_resize(
9345                         buf, size, offset,
9346                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9347                         engine_name, engine_id,
9348                         i, fence_id,
9349                         fence_cnt, engine_name, engine_id, fence_id, i,
9350                         fence_rdata, engine_name, engine_id, fence_id, i,
9351                         fences[fence_id],
9352                         statuses[i]);
9353                 if (rc)
9354                         goto free_fences;
9355         }
9356
9357         rc = 0;
9358
9359 free_fences:
9360         kfree(fences);
9361 free_status:
9362         kfree(statuses);
9363 out:
9364         return rc;
9365 }
9366
9367
9368 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9369         .monitor_valid = gaudi_monitor_valid,
9370         .print_single_monitor = gaudi_print_single_monitor,
9371         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9372         .print_fences_single_engine = gaudi_print_fences_single_engine,
9373 };
9374
9375 static void gaudi_state_dump_init(struct hl_device *hdev)
9376 {
9377         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9378         int i;
9379
9380         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9381                 hash_add(sds->so_id_to_str_tb,
9382                         &gaudi_so_id_to_str[i].node,
9383                         gaudi_so_id_to_str[i].id);
9384
9385         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9386                 hash_add(sds->monitor_id_to_str_tb,
9387                         &gaudi_monitor_id_to_str[i].node,
9388                         gaudi_monitor_id_to_str[i].id);
9389
9390         sds->props = gaudi_state_dump_specs_props;
9391
9392         sds->sync_namager_names = gaudi_sync_manager_names;
9393
9394         sds->funcs = gaudi_state_dump_funcs;
9395 }
9396
9397 static u32 *gaudi_get_stream_master_qid_arr(void)
9398 {
9399         return gaudi_stream_master;
9400 }
9401
9402 static const struct hl_asic_funcs gaudi_funcs = {
9403         .early_init = gaudi_early_init,
9404         .early_fini = gaudi_early_fini,
9405         .late_init = gaudi_late_init,
9406         .late_fini = gaudi_late_fini,
9407         .sw_init = gaudi_sw_init,
9408         .sw_fini = gaudi_sw_fini,
9409         .hw_init = gaudi_hw_init,
9410         .hw_fini = gaudi_hw_fini,
9411         .halt_engines = gaudi_halt_engines,
9412         .suspend = gaudi_suspend,
9413         .resume = gaudi_resume,
9414         .mmap = gaudi_mmap,
9415         .ring_doorbell = gaudi_ring_doorbell,
9416         .pqe_write = gaudi_pqe_write,
9417         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9418         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9419         .scrub_device_mem = gaudi_scrub_device_mem,
9420         .get_int_queue_base = gaudi_get_int_queue_base,
9421         .test_queues = gaudi_test_queues,
9422         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9423         .asic_dma_pool_free = gaudi_dma_pool_free,
9424         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9425         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9426         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9427         .cs_parser = gaudi_cs_parser,
9428         .asic_dma_map_sg = gaudi_dma_map_sg,
9429         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9430         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9431         .update_eq_ci = gaudi_update_eq_ci,
9432         .context_switch = gaudi_context_switch,
9433         .restore_phase_topology = gaudi_restore_phase_topology,
9434         .debugfs_read32 = gaudi_debugfs_read32,
9435         .debugfs_write32 = gaudi_debugfs_write32,
9436         .debugfs_read64 = gaudi_debugfs_read64,
9437         .debugfs_write64 = gaudi_debugfs_write64,
9438         .debugfs_read_dma = gaudi_debugfs_read_dma,
9439         .add_device_attr = gaudi_add_device_attr,
9440         .handle_eqe = gaudi_handle_eqe,
9441         .set_pll_profile = gaudi_set_pll_profile,
9442         .get_events_stat = gaudi_get_events_stat,
9443         .read_pte = gaudi_read_pte,
9444         .write_pte = gaudi_write_pte,
9445         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9446         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9447         .send_heartbeat = gaudi_send_heartbeat,
9448         .set_clock_gating = gaudi_set_clock_gating,
9449         .disable_clock_gating = gaudi_disable_clock_gating,
9450         .debug_coresight = gaudi_debug_coresight,
9451         .is_device_idle = gaudi_is_device_idle,
9452         .soft_reset_late_init = gaudi_soft_reset_late_init,
9453         .hw_queues_lock = gaudi_hw_queues_lock,
9454         .hw_queues_unlock = gaudi_hw_queues_unlock,
9455         .get_pci_id = gaudi_get_pci_id,
9456         .get_eeprom_data = gaudi_get_eeprom_data,
9457         .send_cpu_message = gaudi_send_cpu_message,
9458         .pci_bars_map = gaudi_pci_bars_map,
9459         .init_iatu = gaudi_init_iatu,
9460         .rreg = hl_rreg,
9461         .wreg = hl_wreg,
9462         .halt_coresight = gaudi_halt_coresight,
9463         .ctx_init = gaudi_ctx_init,
9464         .ctx_fini = gaudi_ctx_fini,
9465         .get_clk_rate = gaudi_get_clk_rate,
9466         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9467         .load_firmware_to_device = gaudi_load_firmware_to_device,
9468         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9469         .get_signal_cb_size = gaudi_get_signal_cb_size,
9470         .get_wait_cb_size = gaudi_get_wait_cb_size,
9471         .gen_signal_cb = gaudi_gen_signal_cb,
9472         .gen_wait_cb = gaudi_gen_wait_cb,
9473         .reset_sob = gaudi_reset_sob,
9474         .reset_sob_group = gaudi_reset_sob_group,
9475         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9476         .get_device_time = gaudi_get_device_time,
9477         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9478         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9479         .scramble_addr = hl_mmu_scramble_addr,
9480         .descramble_addr = hl_mmu_descramble_addr,
9481         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9482         .get_hw_block_id = gaudi_get_hw_block_id,
9483         .hw_block_mmap = gaudi_block_mmap,
9484         .enable_events_from_fw = gaudi_enable_events_from_fw,
9485         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9486         .init_firmware_loader = gaudi_init_firmware_loader,
9487         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9488         .state_dump_init = gaudi_state_dump_init,
9489         .get_sob_addr = gaudi_get_sob_addr,
9490         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9491         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9492 };
9493
9494 /**
9495  * gaudi_set_asic_funcs - set GAUDI function pointers
9496  *
9497  * @hdev: pointer to hl_device structure
9498  *
9499  */
9500 void gaudi_set_asic_funcs(struct hl_device *hdev)
9501 {
9502         hdev->asic_funcs = &gaudi_funcs;
9503 }