Merge tag 'spi-fix-v5.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/brooni...
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461         bool disable_clock_gating;
462 };
463
464 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
465                                                                 u64 phys_addr);
466 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
467                                         struct hl_cs_job *job);
468 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
469                                         u32 size, u64 val);
470 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
471                                         u32 num_regs, u32 val);
472 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
473                                 u32 tpc_id);
474 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
475 static int gaudi_cpucp_info_get(struct hl_device *hdev);
476 static void gaudi_disable_clock_gating(struct hl_device *hdev);
477 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
478 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
479                                 u32 size, bool eb);
480 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
481                                 struct hl_gen_wait_properties *prop);
482 static inline enum hl_collective_mode
483 get_collective_mode(struct hl_device *hdev, u32 queue_id)
484 {
485         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
486                 return HL_COLLECTIVE_MASTER;
487
488         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
489                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
490                 return HL_COLLECTIVE_SLAVE;
491
492         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
493                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
494                 return HL_COLLECTIVE_SLAVE;
495
496         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
497                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
498                 return HL_COLLECTIVE_SLAVE;
499
500         return HL_COLLECTIVE_NOT_SUPPORTED;
501 }
502
503 static inline void set_default_power_values(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506
507         if (hdev->card_type == cpucp_card_type_pmc) {
508                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
509
510                 if (prop->fw_security_enabled)
511                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
512                 else
513                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
514         } else {
515                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
516                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
517         }
518 }
519
520 static int gaudi_set_fixed_properties(struct hl_device *hdev)
521 {
522         struct asic_fixed_properties *prop = &hdev->asic_prop;
523         u32 num_sync_stream_queues = 0;
524         int i;
525
526         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
527         prop->hw_queues_props = kcalloc(prop->max_queues,
528                         sizeof(struct hw_queue_properties),
529                         GFP_KERNEL);
530
531         if (!prop->hw_queues_props)
532                 return -ENOMEM;
533
534         for (i = 0 ; i < prop->max_queues ; i++) {
535                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
536                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
537                         prop->hw_queues_props[i].driver_only = 0;
538                         prop->hw_queues_props[i].supports_sync_stream = 1;
539                         prop->hw_queues_props[i].cb_alloc_flags =
540                                 CB_ALLOC_KERNEL;
541                         num_sync_stream_queues++;
542                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
543                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
544                         prop->hw_queues_props[i].driver_only = 1;
545                         prop->hw_queues_props[i].supports_sync_stream = 0;
546                         prop->hw_queues_props[i].cb_alloc_flags =
547                                 CB_ALLOC_KERNEL;
548                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
549                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
550                         prop->hw_queues_props[i].driver_only = 0;
551                         prop->hw_queues_props[i].supports_sync_stream = 0;
552                         prop->hw_queues_props[i].cb_alloc_flags =
553                                 CB_ALLOC_USER;
554
555                 }
556                 prop->hw_queues_props[i].collective_mode =
557                                                 get_collective_mode(hdev, i);
558         }
559
560         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
561         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
562         prop->collective_first_sob = 0;
563         prop->collective_first_mon = 0;
564
565         /* 2 SOBs per internal queue stream are reserved for collective */
566         prop->sync_stream_first_sob =
567                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
568                         * QMAN_STREAMS * HL_RSVD_SOBS;
569
570         /* 1 monitor per internal queue stream are reserved for collective
571          * 2 monitors per external queue stream are reserved for collective
572          */
573         prop->sync_stream_first_mon =
574                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
575                         (NUMBER_OF_EXT_HW_QUEUES * 2);
576
577         prop->dram_base_address = DRAM_PHYS_BASE;
578         prop->dram_size = GAUDI_HBM_SIZE_32GB;
579         prop->dram_end_address = prop->dram_base_address +
580                                         prop->dram_size;
581         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
582
583         prop->sram_base_address = SRAM_BASE_ADDR;
584         prop->sram_size = SRAM_SIZE;
585         prop->sram_end_address = prop->sram_base_address +
586                                         prop->sram_size;
587         prop->sram_user_base_address = prop->sram_base_address +
588                                         SRAM_USER_BASE_OFFSET;
589
590         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
591         if (hdev->pldm)
592                 prop->mmu_pgt_size = 0x800000; /* 8MB */
593         else
594                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
595         prop->mmu_pte_size = HL_PTE_SIZE;
596         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
597         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
598         prop->dram_page_size = PAGE_SIZE_2MB;
599         prop->dram_supports_virtual_memory = false;
600
601         prop->pmmu.hop0_shift = HOP0_SHIFT;
602         prop->pmmu.hop1_shift = HOP1_SHIFT;
603         prop->pmmu.hop2_shift = HOP2_SHIFT;
604         prop->pmmu.hop3_shift = HOP3_SHIFT;
605         prop->pmmu.hop4_shift = HOP4_SHIFT;
606         prop->pmmu.hop0_mask = HOP0_MASK;
607         prop->pmmu.hop1_mask = HOP1_MASK;
608         prop->pmmu.hop2_mask = HOP2_MASK;
609         prop->pmmu.hop3_mask = HOP3_MASK;
610         prop->pmmu.hop4_mask = HOP4_MASK;
611         prop->pmmu.start_addr = VA_HOST_SPACE_START;
612         prop->pmmu.end_addr =
613                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
614         prop->pmmu.page_size = PAGE_SIZE_4KB;
615         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
616
617         /* PMMU and HPMMU are the same except of page size */
618         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
619         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
620
621         /* shifts and masks are the same in PMMU and DMMU */
622         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
623         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
624         prop->dmmu.end_addr = VA_HOST_SPACE_END;
625         prop->dmmu.page_size = PAGE_SIZE_2MB;
626
627         prop->cfg_size = CFG_SIZE;
628         prop->max_asid = MAX_ASID;
629         prop->num_of_events = GAUDI_EVENT_SIZE;
630         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
631
632         set_default_power_values(hdev);
633
634         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
635         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
636
637         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
638         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
639
640         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
641                                         CARD_NAME_MAX_LEN);
642
643         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
644
645         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
646                         prop->sync_stream_first_sob +
647                         (num_sync_stream_queues * HL_RSVD_SOBS);
648         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
649                         prop->sync_stream_first_mon +
650                         (num_sync_stream_queues * HL_RSVD_MONS);
651
652         prop->first_available_user_msix_interrupt = USHRT_MAX;
653
654         for (i = 0 ; i < HL_MAX_DCORES ; i++)
655                 prop->first_available_cq[i] = USHRT_MAX;
656
657         prop->fw_cpu_boot_dev_sts0_valid = false;
658         prop->fw_cpu_boot_dev_sts1_valid = false;
659         prop->hard_reset_done_by_fw = false;
660         prop->gic_interrupts_enable = true;
661
662         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
663
664         prop->clk_pll_index = HL_GAUDI_MME_PLL;
665         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
666
667         return 0;
668 }
669
670 static int gaudi_pci_bars_map(struct hl_device *hdev)
671 {
672         static const char * const name[] = {"SRAM", "CFG", "HBM"};
673         bool is_wc[3] = {false, false, true};
674         int rc;
675
676         rc = hl_pci_bars_map(hdev, name, is_wc);
677         if (rc)
678                 return rc;
679
680         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
681                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
682
683         return 0;
684 }
685
686 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
687 {
688         struct gaudi_device *gaudi = hdev->asic_specific;
689         struct hl_inbound_pci_region pci_region;
690         u64 old_addr = addr;
691         int rc;
692
693         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
694                 return old_addr;
695
696         if (hdev->asic_prop.iatu_done_by_fw)
697                 return U64_MAX;
698
699         /* Inbound Region 2 - Bar 4 - Point to HBM */
700         pci_region.mode = PCI_BAR_MATCH_MODE;
701         pci_region.bar = HBM_BAR_ID;
702         pci_region.addr = addr;
703         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
704         if (rc)
705                 return U64_MAX;
706
707         if (gaudi) {
708                 old_addr = gaudi->hbm_bar_cur_addr;
709                 gaudi->hbm_bar_cur_addr = addr;
710         }
711
712         return old_addr;
713 }
714
715 static int gaudi_init_iatu(struct hl_device *hdev)
716 {
717         struct hl_inbound_pci_region inbound_region;
718         struct hl_outbound_pci_region outbound_region;
719         int rc;
720
721         if (hdev->asic_prop.iatu_done_by_fw)
722                 return 0;
723
724         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
725         inbound_region.mode = PCI_BAR_MATCH_MODE;
726         inbound_region.bar = SRAM_BAR_ID;
727         inbound_region.addr = SRAM_BASE_ADDR;
728         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
729         if (rc)
730                 goto done;
731
732         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
733         inbound_region.mode = PCI_BAR_MATCH_MODE;
734         inbound_region.bar = CFG_BAR_ID;
735         inbound_region.addr = SPI_FLASH_BASE_ADDR;
736         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
737         if (rc)
738                 goto done;
739
740         /* Inbound Region 2 - Bar 4 - Point to HBM */
741         inbound_region.mode = PCI_BAR_MATCH_MODE;
742         inbound_region.bar = HBM_BAR_ID;
743         inbound_region.addr = DRAM_PHYS_BASE;
744         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
745         if (rc)
746                 goto done;
747
748         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
749
750         /* Outbound Region 0 - Point to Host */
751         outbound_region.addr = HOST_PHYS_BASE;
752         outbound_region.size = HOST_PHYS_SIZE;
753         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
754
755 done:
756         return rc;
757 }
758
759 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
760 {
761         return RREG32(mmHW_STATE);
762 }
763
764 static int gaudi_early_init(struct hl_device *hdev)
765 {
766         struct asic_fixed_properties *prop = &hdev->asic_prop;
767         struct pci_dev *pdev = hdev->pdev;
768         u32 fw_boot_status;
769         int rc;
770
771         rc = gaudi_set_fixed_properties(hdev);
772         if (rc) {
773                 dev_err(hdev->dev, "Failed setting fixed properties\n");
774                 return rc;
775         }
776
777         /* Check BAR sizes */
778         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
779                 dev_err(hdev->dev,
780                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
781                         SRAM_BAR_ID,
782                         (unsigned long long) pci_resource_len(pdev,
783                                                         SRAM_BAR_ID),
784                         SRAM_BAR_SIZE);
785                 rc = -ENODEV;
786                 goto free_queue_props;
787         }
788
789         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
790                 dev_err(hdev->dev,
791                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
792                         CFG_BAR_ID,
793                         (unsigned long long) pci_resource_len(pdev,
794                                                                 CFG_BAR_ID),
795                         CFG_BAR_SIZE);
796                 rc = -ENODEV;
797                 goto free_queue_props;
798         }
799
800         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
801         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
802
803         /* If FW security is enabled at this point it means no access to ELBI */
804         if (hdev->asic_prop.fw_security_enabled) {
805                 hdev->asic_prop.iatu_done_by_fw = true;
806
807                 /*
808                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
809                  * decision can only be taken based on PCI ID security.
810                  */
811                 hdev->asic_prop.gic_interrupts_enable = false;
812                 goto pci_init;
813         }
814
815         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
816                                 &fw_boot_status);
817         if (rc)
818                 goto free_queue_props;
819
820         /* Check whether FW is configuring iATU */
821         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
822                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
823                 hdev->asic_prop.iatu_done_by_fw = true;
824
825 pci_init:
826         rc = hl_pci_init(hdev);
827         if (rc)
828                 goto free_queue_props;
829
830         /* Before continuing in the initialization, we need to read the preboot
831          * version to determine whether we run with a security-enabled firmware
832          */
833         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
834                                         mmCPU_BOOT_DEV_STS0,
835                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
836                                         mmCPU_BOOT_ERR1,
837                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
838         if (rc) {
839                 if (hdev->reset_on_preboot_fail)
840                         hdev->asic_funcs->hw_fini(hdev, true, false);
841                 goto pci_fini;
842         }
843
844         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
845                 dev_info(hdev->dev,
846                         "H/W state is dirty, must reset before initializing\n");
847                 hdev->asic_funcs->hw_fini(hdev, true, false);
848         }
849
850         return 0;
851
852 pci_fini:
853         hl_pci_fini(hdev);
854 free_queue_props:
855         kfree(hdev->asic_prop.hw_queues_props);
856         return rc;
857 }
858
859 static int gaudi_early_fini(struct hl_device *hdev)
860 {
861         kfree(hdev->asic_prop.hw_queues_props);
862         hl_pci_fini(hdev);
863
864         return 0;
865 }
866
867 /**
868  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
869  *
870  * @hdev: pointer to hl_device structure
871  *
872  */
873 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
874 {
875         struct asic_fixed_properties *prop = &hdev->asic_prop;
876         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
877         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
878         int rc;
879
880         if (hdev->asic_prop.fw_security_enabled) {
881                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
882
883                 if (rc)
884                         return rc;
885
886                 freq = pll_freq_arr[2];
887         } else {
888                 /* Backward compatibility */
889                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
890                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
891                 nr = RREG32(mmPSOC_CPU_PLL_NR);
892                 nf = RREG32(mmPSOC_CPU_PLL_NF);
893                 od = RREG32(mmPSOC_CPU_PLL_OD);
894
895                 if (div_sel == DIV_SEL_REF_CLK ||
896                                 div_sel == DIV_SEL_DIVIDED_REF) {
897                         if (div_sel == DIV_SEL_REF_CLK)
898                                 freq = PLL_REF_CLK;
899                         else
900                                 freq = PLL_REF_CLK / (div_fctr + 1);
901                 } else if (div_sel == DIV_SEL_PLL_CLK ||
902                         div_sel == DIV_SEL_DIVIDED_PLL) {
903                         pll_clk = PLL_REF_CLK * (nf + 1) /
904                                         ((nr + 1) * (od + 1));
905                         if (div_sel == DIV_SEL_PLL_CLK)
906                                 freq = pll_clk;
907                         else
908                                 freq = pll_clk / (div_fctr + 1);
909                 } else {
910                         dev_warn(hdev->dev,
911                                 "Received invalid div select value: %d",
912                                 div_sel);
913                         freq = 0;
914                 }
915         }
916
917         prop->psoc_timestamp_frequency = freq;
918         prop->psoc_pci_pll_nr = nr;
919         prop->psoc_pci_pll_nf = nf;
920         prop->psoc_pci_pll_od = od;
921         prop->psoc_pci_pll_div_factor = div_fctr;
922
923         return 0;
924 }
925
926 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
927                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
928 {
929         struct asic_fixed_properties *prop = &hdev->asic_prop;
930         struct packet_lin_dma *init_tpc_mem_pkt;
931         struct hl_cs_job *job;
932         struct hl_cb *cb;
933         u64 dst_addr;
934         u32 cb_size, ctl;
935         u8 tpc_id;
936         int rc;
937
938         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
939         if (!cb)
940                 return -EFAULT;
941
942         init_tpc_mem_pkt = cb->kernel_address;
943         cb_size = sizeof(*init_tpc_mem_pkt);
944         memset(init_tpc_mem_pkt, 0, cb_size);
945
946         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
947
948         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
949         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
950         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
951         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
952
953         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
954
955         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
956         dst_addr = (prop->sram_user_base_address &
957                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
958                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
959         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
960
961         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
962         if (!job) {
963                 dev_err(hdev->dev, "Failed to allocate a new job\n");
964                 rc = -ENOMEM;
965                 goto release_cb;
966         }
967
968         job->id = 0;
969         job->user_cb = cb;
970         atomic_inc(&job->user_cb->cs_cnt);
971         job->user_cb_size = cb_size;
972         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
973         job->patched_cb = job->user_cb;
974         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
975
976         hl_debugfs_add_job(hdev, job);
977
978         rc = gaudi_send_job_on_qman0(hdev, job);
979
980         if (rc)
981                 goto free_job;
982
983         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
984                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
985                 if (rc)
986                         break;
987         }
988
989 free_job:
990         hl_userptr_delete_list(hdev, &job->userptr_list);
991         hl_debugfs_remove_job(hdev, job);
992         kfree(job);
993         atomic_dec(&cb->cs_cnt);
994
995 release_cb:
996         hl_cb_put(cb);
997         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
998
999         return rc;
1000 }
1001
1002 /*
1003  * gaudi_init_tpc_mem() - Initialize TPC memories.
1004  * @hdev: Pointer to hl_device structure.
1005  *
1006  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1007  *
1008  * Return: 0 for success, negative value for error.
1009  */
1010 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1011 {
1012         const struct firmware *fw;
1013         size_t fw_size;
1014         void *cpu_addr;
1015         dma_addr_t dma_handle;
1016         int rc, count = 5;
1017
1018 again:
1019         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1020         if (rc == -EINTR && count-- > 0) {
1021                 msleep(50);
1022                 goto again;
1023         }
1024
1025         if (rc) {
1026                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1027                                 GAUDI_TPC_FW_FILE);
1028                 goto out;
1029         }
1030
1031         fw_size = fw->size;
1032         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1033                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1034         if (!cpu_addr) {
1035                 dev_err(hdev->dev,
1036                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1037                         fw_size);
1038                 rc = -ENOMEM;
1039                 goto out;
1040         }
1041
1042         memcpy(cpu_addr, fw->data, fw_size);
1043
1044         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1045
1046         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1047                         dma_handle);
1048
1049 out:
1050         release_firmware(fw);
1051         return rc;
1052 }
1053
1054 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1055 {
1056         struct gaudi_device *gaudi = hdev->asic_specific;
1057         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1058         struct hl_hw_queue *q;
1059         u32 i, sob_id, sob_group_id, queue_id;
1060
1061         /* Iterate through SOB groups and assign a SOB for each slave queue */
1062         sob_group_id =
1063                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1064         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1065
1066         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1067         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1068                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1069                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1070         }
1071
1072         /* Both DMA5 and TPC7 use the same resources since only a single
1073          * engine need to participate in the reduction process
1074          */
1075         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1076         q = &hdev->kernel_queues[queue_id];
1077         q->sync_stream_prop.collective_sob_id =
1078                         sob_id + NIC_NUMBER_OF_ENGINES;
1079
1080         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1081         q = &hdev->kernel_queues[queue_id];
1082         q->sync_stream_prop.collective_sob_id =
1083                         sob_id + NIC_NUMBER_OF_ENGINES;
1084 }
1085
1086 static void gaudi_sob_group_hw_reset(struct kref *ref)
1087 {
1088         struct gaudi_hw_sob_group *hw_sob_group =
1089                 container_of(ref, struct gaudi_hw_sob_group, kref);
1090         struct hl_device *hdev = hw_sob_group->hdev;
1091         int i;
1092
1093         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1094                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1095                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1096
1097         kref_init(&hw_sob_group->kref);
1098 }
1099
1100 static void gaudi_sob_group_reset_error(struct kref *ref)
1101 {
1102         struct gaudi_hw_sob_group *hw_sob_group =
1103                 container_of(ref, struct gaudi_hw_sob_group, kref);
1104         struct hl_device *hdev = hw_sob_group->hdev;
1105
1106         dev_crit(hdev->dev,
1107                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1108                 hw_sob_group->base_sob_id);
1109 }
1110
1111 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1112 {
1113         struct gaudi_collective_properties *prop;
1114         int i;
1115
1116         prop = &gaudi->collective_props;
1117
1118         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1119
1120         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1121                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1122                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1123                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1124         /* Set collective engine bit */
1125         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1126                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1127 }
1128
1129 static int gaudi_collective_init(struct hl_device *hdev)
1130 {
1131         u32 i, sob_id, reserved_sobs_per_group;
1132         struct gaudi_collective_properties *prop;
1133         struct gaudi_device *gaudi;
1134
1135         gaudi = hdev->asic_specific;
1136         prop = &gaudi->collective_props;
1137         sob_id = hdev->asic_prop.collective_first_sob;
1138
1139         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1140         reserved_sobs_per_group =
1141                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1142
1143         /* Init SOB groups */
1144         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1145                 prop->hw_sob_group[i].hdev = hdev;
1146                 prop->hw_sob_group[i].base_sob_id = sob_id;
1147                 sob_id += reserved_sobs_per_group;
1148                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1149         }
1150
1151         for (i = 0 ; i < QMAN_STREAMS; i++) {
1152                 prop->next_sob_group_val[i] = 1;
1153                 prop->curr_sob_group_idx[i] = 0;
1154                 gaudi_collective_map_sobs(hdev, i);
1155         }
1156
1157         gaudi_collective_mstr_sob_mask_set(gaudi);
1158
1159         return 0;
1160 }
1161
1162 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1163 {
1164         struct gaudi_device *gaudi = hdev->asic_specific;
1165         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1166
1167         kref_put(&cprop->hw_sob_group[sob_group].kref,
1168                                         gaudi_sob_group_hw_reset);
1169 }
1170
1171 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1172                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1173 {
1174         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1175         struct gaudi_collective_properties *cprop;
1176         struct hl_gen_wait_properties wait_prop;
1177         struct hl_sync_stream_properties *prop;
1178         struct gaudi_device *gaudi;
1179
1180         gaudi = hdev->asic_specific;
1181         cprop = &gaudi->collective_props;
1182         queue_id = job->hw_queue_id;
1183         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1184
1185         master_sob_base =
1186                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1187         master_monitor = prop->collective_mstr_mon_id[0];
1188
1189         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1190
1191         dev_dbg(hdev->dev,
1192                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1193                 master_sob_base, cprop->mstr_sob_mask[0],
1194                 cprop->next_sob_group_val[stream],
1195                 master_monitor, queue_id);
1196
1197         wait_prop.data = (void *) job->patched_cb;
1198         wait_prop.sob_base = master_sob_base;
1199         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1200         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1201         wait_prop.mon_id = master_monitor;
1202         wait_prop.q_idx = queue_id;
1203         wait_prop.size = cb_size;
1204         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1205
1206         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1207         master_monitor = prop->collective_mstr_mon_id[1];
1208
1209         dev_dbg(hdev->dev,
1210                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1211                 master_sob_base, cprop->mstr_sob_mask[1],
1212                 cprop->next_sob_group_val[stream],
1213                 master_monitor, queue_id);
1214
1215         wait_prop.sob_base = master_sob_base;
1216         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1217         wait_prop.mon_id = master_monitor;
1218         wait_prop.size = cb_size;
1219         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1220 }
1221
1222 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1223                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1224 {
1225         struct hl_gen_wait_properties wait_prop;
1226         struct hl_sync_stream_properties *prop;
1227         u32 queue_id, cb_size = 0;
1228
1229         queue_id = job->hw_queue_id;
1230         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1231
1232         if (job->cs->encaps_signals) {
1233                 /* use the encaps signal handle store earlier in the flow
1234                  * and set the SOB information from the encaps
1235                  * signals handle
1236                  */
1237                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1238                                                 cs_cmpl);
1239
1240                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1241                                 job->cs->sequence,
1242                                 cs_cmpl->hw_sob->sob_id,
1243                                 cs_cmpl->sob_val);
1244         }
1245
1246         /* Add to wait CBs using slave monitor */
1247         wait_prop.data = (void *) job->user_cb;
1248         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1249         wait_prop.sob_mask = 0x1;
1250         wait_prop.sob_val = cs_cmpl->sob_val;
1251         wait_prop.mon_id = prop->collective_slave_mon_id;
1252         wait_prop.q_idx = queue_id;
1253         wait_prop.size = cb_size;
1254
1255         dev_dbg(hdev->dev,
1256                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1257                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1258                 prop->collective_slave_mon_id, queue_id);
1259
1260         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1261
1262         dev_dbg(hdev->dev,
1263                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1264                 prop->collective_sob_id, queue_id);
1265
1266         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1267                         prop->collective_sob_id, cb_size, false);
1268 }
1269
1270 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1271 {
1272         struct hl_cs_compl *signal_cs_cmpl =
1273                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1274         struct hl_cs_compl *cs_cmpl =
1275                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1276         struct gaudi_collective_properties *cprop;
1277         u32 stream, queue_id, sob_group_offset;
1278         struct gaudi_device *gaudi;
1279         struct hl_device *hdev;
1280         struct hl_cs_job *job;
1281         struct hl_ctx *ctx;
1282
1283         ctx = cs->ctx;
1284         hdev = ctx->hdev;
1285         gaudi = hdev->asic_specific;
1286         cprop = &gaudi->collective_props;
1287
1288         /* In encaps signals case the SOB info will be retrieved from
1289          * the handle in gaudi_collective_slave_init_job.
1290          */
1291         if (!cs->encaps_signals) {
1292                 /* copy the SOB id and value of the signal CS */
1293                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1294                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1295         }
1296
1297         /* check again if the signal cs already completed.
1298          * if yes then don't send any wait cs since the hw_sob
1299          * could be in reset already. if signal is not completed
1300          * then get refcount to hw_sob to prevent resetting the sob
1301          * while wait cs is not submitted.
1302          * note that this check is protected by two locks,
1303          * hw queue lock and completion object lock,
1304          * and the same completion object lock also protects
1305          * the hw_sob reset handler function.
1306          * The hw_queue lock prevent out of sync of hw_sob
1307          * refcount value, changed by signal/wait flows.
1308          */
1309         spin_lock(&signal_cs_cmpl->lock);
1310
1311         if (completion_done(&cs->signal_fence->completion)) {
1312                 spin_unlock(&signal_cs_cmpl->lock);
1313                 return -EINVAL;
1314         }
1315         /* Increment kref since all slave queues are now waiting on it */
1316         kref_get(&cs_cmpl->hw_sob->kref);
1317
1318         spin_unlock(&signal_cs_cmpl->lock);
1319
1320         /* Calculate the stream from collective master queue (1st job) */
1321         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1322         stream = job->hw_queue_id % 4;
1323         sob_group_offset =
1324                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1325
1326         list_for_each_entry(job, &cs->job_list, cs_node) {
1327                 queue_id = job->hw_queue_id;
1328
1329                 if (hdev->kernel_queues[queue_id].collective_mode ==
1330                                 HL_COLLECTIVE_MASTER)
1331                         gaudi_collective_master_init_job(hdev, job, stream,
1332                                                 sob_group_offset);
1333                 else
1334                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1335         }
1336
1337         cs_cmpl->sob_group = sob_group_offset;
1338
1339         /* Handle sob group kref and wraparound */
1340         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1341         cprop->next_sob_group_val[stream]++;
1342
1343         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1344                 /*
1345                  * Decrement as we reached the max value.
1346                  * The release function won't be called here as we've
1347                  * just incremented the refcount.
1348                  */
1349                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1350                                 gaudi_sob_group_reset_error);
1351                 cprop->next_sob_group_val[stream] = 1;
1352                 /* only two SOBs are currently in use */
1353                 cprop->curr_sob_group_idx[stream] =
1354                         (cprop->curr_sob_group_idx[stream] + 1) &
1355                                                         (HL_RSVD_SOBS - 1);
1356
1357                 gaudi_collective_map_sobs(hdev, stream);
1358
1359                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1360                                 cprop->curr_sob_group_idx[stream], stream);
1361         }
1362
1363         mb();
1364         hl_fence_put(cs->signal_fence);
1365         cs->signal_fence = NULL;
1366
1367         return 0;
1368 }
1369
1370 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1371                 struct hl_ctx *ctx, struct hl_cs *cs,
1372                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1373                 u32 encaps_signal_offset)
1374 {
1375         struct hw_queue_properties *hw_queue_prop;
1376         struct hl_cs_counters_atomic *cntr;
1377         struct hl_cs_job *job;
1378         struct hl_cb *cb;
1379         u32 cb_size;
1380         bool patched_cb;
1381
1382         cntr = &hdev->aggregated_cs_counters;
1383
1384         if (mode == HL_COLLECTIVE_MASTER) {
1385                 /* CB size of collective master queue contains
1386                  * 4 msg short packets for monitor 1 configuration
1387                  * 1 fence packet
1388                  * 4 msg short packets for monitor 2 configuration
1389                  * 1 fence packet
1390                  * 2 msg prot packets for completion and MSI-X
1391                  */
1392                 cb_size = sizeof(struct packet_msg_short) * 8 +
1393                                 sizeof(struct packet_fence) * 2 +
1394                                 sizeof(struct packet_msg_prot) * 2;
1395                 patched_cb = true;
1396         } else {
1397                 /* CB size of collective slave queues contains
1398                  * 4 msg short packets for monitor configuration
1399                  * 1 fence packet
1400                  * 1 additional msg short packet for sob signal
1401                  */
1402                 cb_size = sizeof(struct packet_msg_short) * 5 +
1403                                 sizeof(struct packet_fence);
1404                 patched_cb = false;
1405         }
1406
1407         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1408         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1409         if (!job) {
1410                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1411                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1412                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1413                 return -ENOMEM;
1414         }
1415
1416         /* Allocate internal mapped CB for non patched CBs */
1417         cb = hl_cb_kernel_create(hdev, cb_size,
1418                         hdev->mmu_enable && !patched_cb);
1419         if (!cb) {
1420                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1421                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1422                 kfree(job);
1423                 return -EFAULT;
1424         }
1425
1426         job->id = 0;
1427         job->cs = cs;
1428         job->user_cb = cb;
1429         atomic_inc(&job->user_cb->cs_cnt);
1430         job->user_cb_size = cb_size;
1431         job->hw_queue_id = queue_id;
1432
1433         /* since its guaranteed to have only one chunk in the collective wait
1434          * cs, we can use this chunk to set the encapsulated signal offset
1435          * in the jobs.
1436          */
1437         if (cs->encaps_signals)
1438                 job->encaps_sig_wait_offset = encaps_signal_offset;
1439
1440         /*
1441          * No need in parsing, user CB is the patched CB.
1442          * We call hl_cb_destroy() out of two reasons - we don't need
1443          * the CB in the CB idr anymore and to decrement its refcount as
1444          * it was incremented inside hl_cb_kernel_create().
1445          */
1446         if (patched_cb)
1447                 job->patched_cb = job->user_cb;
1448         else
1449                 job->patched_cb = NULL;
1450
1451         job->job_cb_size = job->user_cb_size;
1452         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1453
1454         /* increment refcount as for external queues we get completion */
1455         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1456                 cs_get(cs);
1457
1458         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1459
1460         list_add_tail(&job->cs_node, &cs->job_list);
1461
1462         hl_debugfs_add_job(hdev, job);
1463
1464         return 0;
1465 }
1466
1467 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1468                 struct hl_ctx *ctx, struct hl_cs *cs,
1469                 u32 wait_queue_id, u32 collective_engine_id,
1470                 u32 encaps_signal_offset)
1471 {
1472         struct gaudi_device *gaudi = hdev->asic_specific;
1473         struct hw_queue_properties *hw_queue_prop;
1474         u32 queue_id, collective_queue, num_jobs;
1475         u32 stream, nic_queue, nic_idx = 0;
1476         bool skip;
1477         int i, rc = 0;
1478
1479         /* Verify wait queue id is configured as master */
1480         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1481         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1482                 dev_err(hdev->dev,
1483                         "Queue %d is not configured as collective master\n",
1484                         wait_queue_id);
1485                 return -EINVAL;
1486         }
1487
1488         /* Verify engine id is supported */
1489         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1490                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1491                 dev_err(hdev->dev,
1492                         "Collective wait does not support engine %u\n",
1493                         collective_engine_id);
1494                 return -EINVAL;
1495         }
1496
1497         stream = wait_queue_id % 4;
1498
1499         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1500                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1501         else
1502                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1503
1504         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1505         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1506
1507         /* First job goes to the collective master queue, it will wait for
1508          * the collective slave queues to finish execution.
1509          * The synchronization is done using two monitors:
1510          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1511          * reduction engine (DMA5/TPC7).
1512          *
1513          * Rest of the jobs goes to the collective slave queues which will
1514          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1515          */
1516         for (i = 0 ; i < num_jobs ; i++) {
1517                 if (i == 0) {
1518                         queue_id = wait_queue_id;
1519                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1520                                 HL_COLLECTIVE_MASTER, queue_id,
1521                                 wait_queue_id, encaps_signal_offset);
1522                 } else {
1523                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1524                                 if (gaudi->hw_cap_initialized &
1525                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1526                                         skip = false;
1527                                 else
1528                                         skip = true;
1529
1530                                 queue_id = nic_queue;
1531                                 nic_queue += 4;
1532                                 nic_idx++;
1533
1534                                 if (skip)
1535                                         continue;
1536                         } else {
1537                                 queue_id = collective_queue;
1538                         }
1539
1540                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1541                                 HL_COLLECTIVE_SLAVE, queue_id,
1542                                 wait_queue_id, encaps_signal_offset);
1543                 }
1544
1545                 if (rc)
1546                         return rc;
1547         }
1548
1549         return rc;
1550 }
1551
1552 static int gaudi_late_init(struct hl_device *hdev)
1553 {
1554         struct gaudi_device *gaudi = hdev->asic_specific;
1555         int rc;
1556
1557         rc = gaudi->cpucp_info_get(hdev);
1558         if (rc) {
1559                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1560                 return rc;
1561         }
1562
1563         if ((hdev->card_type == cpucp_card_type_pci) &&
1564                         (hdev->nic_ports_mask & 0x3)) {
1565                 dev_info(hdev->dev,
1566                         "PCI card detected, only 8 ports are enabled\n");
1567                 hdev->nic_ports_mask &= ~0x3;
1568
1569                 /* Stop and disable unused NIC QMANs */
1570                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1571                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1572                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1573
1574                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1575                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1576                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1577
1578                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1579                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1580
1581                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1582         }
1583
1584         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1585         if (rc) {
1586                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1587                 return rc;
1588         }
1589
1590         /* Scrub both SRAM and DRAM */
1591         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1592         if (rc)
1593                 goto disable_pci_access;
1594
1595         rc = gaudi_fetch_psoc_frequency(hdev);
1596         if (rc) {
1597                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1598                 goto disable_pci_access;
1599         }
1600
1601         rc = gaudi_mmu_clear_pgt_range(hdev);
1602         if (rc) {
1603                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1604                 goto disable_pci_access;
1605         }
1606
1607         rc = gaudi_init_tpc_mem(hdev);
1608         if (rc) {
1609                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1610                 goto disable_pci_access;
1611         }
1612
1613         rc = gaudi_collective_init(hdev);
1614         if (rc) {
1615                 dev_err(hdev->dev, "Failed to init collective\n");
1616                 goto disable_pci_access;
1617         }
1618
1619         /* We only support a single ASID for the user, so for the sake of optimization, just
1620          * initialize the ASID one time during device initialization with the fixed value of 1
1621          */
1622         gaudi_mmu_prepare(hdev, 1);
1623
1624         return 0;
1625
1626 disable_pci_access:
1627         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1628
1629         return rc;
1630 }
1631
1632 static void gaudi_late_fini(struct hl_device *hdev)
1633 {
1634         const struct hwmon_channel_info **channel_info_arr;
1635         int i = 0;
1636
1637         if (!hdev->hl_chip_info->info)
1638                 return;
1639
1640         channel_info_arr = hdev->hl_chip_info->info;
1641
1642         while (channel_info_arr[i]) {
1643                 kfree(channel_info_arr[i]->config);
1644                 kfree(channel_info_arr[i]);
1645                 i++;
1646         }
1647
1648         kfree(channel_info_arr);
1649
1650         hdev->hl_chip_info->info = NULL;
1651 }
1652
1653 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1654 {
1655         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1656         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1657         int i, j, rc = 0;
1658
1659         /*
1660          * The device CPU works with 40-bits addresses, while bit 39 must be set
1661          * to '1' when accessing the host.
1662          * Bits 49:39 of the full host address are saved for a later
1663          * configuration of the HW to perform extension to 50 bits.
1664          * Because there is a single HW register that holds the extension bits,
1665          * these bits must be identical in all allocated range.
1666          */
1667
1668         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1669                 virt_addr_arr[i] =
1670                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1671                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1672                                                 &dma_addr_arr[i],
1673                                                 GFP_KERNEL | __GFP_ZERO);
1674                 if (!virt_addr_arr[i]) {
1675                         rc = -ENOMEM;
1676                         goto free_dma_mem_arr;
1677                 }
1678
1679                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1680                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1681                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1682                         break;
1683         }
1684
1685         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1686                 dev_err(hdev->dev,
1687                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1688                 rc = -EFAULT;
1689                 goto free_dma_mem_arr;
1690         }
1691
1692         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1693         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1694         hdev->cpu_pci_msb_addr =
1695                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1696
1697         if (!hdev->asic_prop.fw_security_enabled)
1698                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1699
1700 free_dma_mem_arr:
1701         for (j = 0 ; j < i ; j++)
1702                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1703                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1704                                                 virt_addr_arr[j],
1705                                                 dma_addr_arr[j]);
1706
1707         return rc;
1708 }
1709
1710 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1711 {
1712         struct gaudi_device *gaudi = hdev->asic_specific;
1713         struct gaudi_internal_qman_info *q;
1714         u32 i;
1715
1716         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1717                 q = &gaudi->internal_qmans[i];
1718                 if (!q->pq_kernel_addr)
1719                         continue;
1720                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1721                                                         q->pq_kernel_addr,
1722                                                         q->pq_dma_addr);
1723         }
1724 }
1725
1726 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1727 {
1728         struct gaudi_device *gaudi = hdev->asic_specific;
1729         struct gaudi_internal_qman_info *q;
1730         int rc, i;
1731
1732         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1733                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1734                         continue;
1735
1736                 q = &gaudi->internal_qmans[i];
1737
1738                 switch (i) {
1739                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1740                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1741                         break;
1742                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1743                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1744                         break;
1745                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1746                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1747                         break;
1748                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1749                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1750                         break;
1751                 default:
1752                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1753                         rc = -EINVAL;
1754                         goto free_internal_qmans_pq_mem;
1755                 }
1756
1757                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1758                                                 hdev, q->pq_size,
1759                                                 &q->pq_dma_addr,
1760                                                 GFP_KERNEL | __GFP_ZERO);
1761                 if (!q->pq_kernel_addr) {
1762                         rc = -ENOMEM;
1763                         goto free_internal_qmans_pq_mem;
1764                 }
1765         }
1766
1767         return 0;
1768
1769 free_internal_qmans_pq_mem:
1770         gaudi_free_internal_qmans_pq_mem(hdev);
1771         return rc;
1772 }
1773
1774 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1775 {
1776         struct asic_fixed_properties *prop = &hdev->asic_prop;
1777         struct pci_mem_region *region;
1778
1779         /* CFG */
1780         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1781         region->region_base = CFG_BASE;
1782         region->region_size = CFG_SIZE;
1783         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1784         region->bar_size = CFG_BAR_SIZE;
1785         region->bar_id = CFG_BAR_ID;
1786         region->used = 1;
1787
1788         /* SRAM */
1789         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1790         region->region_base = SRAM_BASE_ADDR;
1791         region->region_size = SRAM_SIZE;
1792         region->offset_in_bar = 0;
1793         region->bar_size = SRAM_BAR_SIZE;
1794         region->bar_id = SRAM_BAR_ID;
1795         region->used = 1;
1796
1797         /* DRAM */
1798         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1799         region->region_base = DRAM_PHYS_BASE;
1800         region->region_size = hdev->asic_prop.dram_size;
1801         region->offset_in_bar = 0;
1802         region->bar_size = prop->dram_pci_bar_size;
1803         region->bar_id = HBM_BAR_ID;
1804         region->used = 1;
1805
1806         /* SP SRAM */
1807         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1808         region->region_base = PSOC_SCRATCHPAD_ADDR;
1809         region->region_size = PSOC_SCRATCHPAD_SIZE;
1810         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1811         region->bar_size = CFG_BAR_SIZE;
1812         region->bar_id = CFG_BAR_ID;
1813         region->used = 1;
1814 }
1815
1816 static int gaudi_sw_init(struct hl_device *hdev)
1817 {
1818         struct gaudi_device *gaudi;
1819         u32 i, event_id = 0;
1820         int rc;
1821
1822         /* Allocate device structure */
1823         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1824         if (!gaudi)
1825                 return -ENOMEM;
1826
1827         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1828                 if (gaudi_irq_map_table[i].valid) {
1829                         if (event_id == GAUDI_EVENT_SIZE) {
1830                                 dev_err(hdev->dev,
1831                                         "Event array exceeds the limit of %u events\n",
1832                                         GAUDI_EVENT_SIZE);
1833                                 rc = -EINVAL;
1834                                 goto free_gaudi_device;
1835                         }
1836
1837                         gaudi->events[event_id++] =
1838                                         gaudi_irq_map_table[i].fc_id;
1839                 }
1840         }
1841
1842         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1843
1844         hdev->asic_specific = gaudi;
1845
1846         /* Create DMA pool for small allocations */
1847         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1848                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1849         if (!hdev->dma_pool) {
1850                 dev_err(hdev->dev, "failed to create DMA pool\n");
1851                 rc = -ENOMEM;
1852                 goto free_gaudi_device;
1853         }
1854
1855         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1856         if (rc)
1857                 goto free_dma_pool;
1858
1859         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1860         if (!hdev->cpu_accessible_dma_pool) {
1861                 dev_err(hdev->dev,
1862                         "Failed to create CPU accessible DMA pool\n");
1863                 rc = -ENOMEM;
1864                 goto free_cpu_dma_mem;
1865         }
1866
1867         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1868                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1869                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1870         if (rc) {
1871                 dev_err(hdev->dev,
1872                         "Failed to add memory to CPU accessible DMA pool\n");
1873                 rc = -EFAULT;
1874                 goto free_cpu_accessible_dma_pool;
1875         }
1876
1877         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1878         if (rc)
1879                 goto free_cpu_accessible_dma_pool;
1880
1881         spin_lock_init(&gaudi->hw_queues_lock);
1882         mutex_init(&gaudi->clk_gate_mutex);
1883
1884         hdev->supports_sync_stream = true;
1885         hdev->supports_coresight = true;
1886         hdev->supports_staged_submission = true;
1887         hdev->supports_wait_for_multi_cs = true;
1888
1889         hdev->asic_funcs->set_pci_memory_regions(hdev);
1890         hdev->stream_master_qid_arr =
1891                                 hdev->asic_funcs->get_stream_master_qid_arr();
1892         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1893
1894         return 0;
1895
1896 free_cpu_accessible_dma_pool:
1897         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1898 free_cpu_dma_mem:
1899         if (!hdev->asic_prop.fw_security_enabled)
1900                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1901                                         hdev->cpu_pci_msb_addr);
1902         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1903                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1904                         hdev->cpu_accessible_dma_mem,
1905                         hdev->cpu_accessible_dma_address);
1906 free_dma_pool:
1907         dma_pool_destroy(hdev->dma_pool);
1908 free_gaudi_device:
1909         kfree(gaudi);
1910         return rc;
1911 }
1912
1913 static int gaudi_sw_fini(struct hl_device *hdev)
1914 {
1915         struct gaudi_device *gaudi = hdev->asic_specific;
1916
1917         gaudi_free_internal_qmans_pq_mem(hdev);
1918
1919         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1920
1921         if (!hdev->asic_prop.fw_security_enabled)
1922                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1923                                         hdev->cpu_pci_msb_addr);
1924
1925         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1926                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1927                         hdev->cpu_accessible_dma_mem,
1928                         hdev->cpu_accessible_dma_address);
1929
1930         dma_pool_destroy(hdev->dma_pool);
1931
1932         mutex_destroy(&gaudi->clk_gate_mutex);
1933
1934         kfree(gaudi);
1935
1936         return 0;
1937 }
1938
1939 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1940 {
1941         struct hl_device *hdev = arg;
1942         int i;
1943
1944         if (hdev->disabled)
1945                 return IRQ_HANDLED;
1946
1947         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1948                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1949
1950         hl_irq_handler_eq(irq, &hdev->event_queue);
1951
1952         return IRQ_HANDLED;
1953 }
1954
1955 /*
1956  * For backward compatibility, new MSI interrupts should be set after the
1957  * existing CPU and NIC interrupts.
1958  */
1959 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1960                                 bool cpu_eq)
1961 {
1962         int msi_vec;
1963
1964         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1965                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1966                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1967
1968         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1969                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1970
1971         return pci_irq_vector(hdev->pdev, msi_vec);
1972 }
1973
1974 static int gaudi_enable_msi_single(struct hl_device *hdev)
1975 {
1976         int rc, irq;
1977
1978         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1979
1980         irq = gaudi_pci_irq_vector(hdev, 0, false);
1981         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1982                         "gaudi single msi", hdev);
1983         if (rc)
1984                 dev_err(hdev->dev,
1985                         "Failed to request single MSI IRQ\n");
1986
1987         return rc;
1988 }
1989
1990 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1991 {
1992         int cq_cnt = hdev->asic_prop.completion_queues_count;
1993         int rc, i, irq_cnt_init, irq;
1994
1995         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1996                 irq = gaudi_pci_irq_vector(hdev, i, false);
1997                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1998                                 &hdev->completion_queue[i]);
1999                 if (rc) {
2000                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2001                         goto free_irqs;
2002                 }
2003         }
2004
2005         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2006         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2007                                 &hdev->event_queue);
2008         if (rc) {
2009                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2010                 goto free_irqs;
2011         }
2012
2013         return 0;
2014
2015 free_irqs:
2016         for (i = 0 ; i < irq_cnt_init ; i++)
2017                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2018                                 &hdev->completion_queue[i]);
2019         return rc;
2020 }
2021
2022 static int gaudi_enable_msi(struct hl_device *hdev)
2023 {
2024         struct gaudi_device *gaudi = hdev->asic_specific;
2025         int rc;
2026
2027         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2028                 return 0;
2029
2030         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2031         if (rc < 0) {
2032                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2033                 return rc;
2034         }
2035
2036         if (rc < NUMBER_OF_INTERRUPTS) {
2037                 gaudi->multi_msi_mode = false;
2038                 rc = gaudi_enable_msi_single(hdev);
2039         } else {
2040                 gaudi->multi_msi_mode = true;
2041                 rc = gaudi_enable_msi_multi(hdev);
2042         }
2043
2044         if (rc)
2045                 goto free_pci_irq_vectors;
2046
2047         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2048
2049         return 0;
2050
2051 free_pci_irq_vectors:
2052         pci_free_irq_vectors(hdev->pdev);
2053         return rc;
2054 }
2055
2056 static void gaudi_sync_irqs(struct hl_device *hdev)
2057 {
2058         struct gaudi_device *gaudi = hdev->asic_specific;
2059         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2060
2061         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2062                 return;
2063
2064         /* Wait for all pending IRQs to be finished */
2065         if (gaudi->multi_msi_mode) {
2066                 for (i = 0 ; i < cq_cnt ; i++)
2067                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2068
2069                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2070                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2071                                                 true));
2072         } else {
2073                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2074         }
2075 }
2076
2077 static void gaudi_disable_msi(struct hl_device *hdev)
2078 {
2079         struct gaudi_device *gaudi = hdev->asic_specific;
2080         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2081
2082         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2083                 return;
2084
2085         gaudi_sync_irqs(hdev);
2086
2087         if (gaudi->multi_msi_mode) {
2088                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2089                                                 true);
2090                 free_irq(irq, &hdev->event_queue);
2091
2092                 for (i = 0 ; i < cq_cnt ; i++) {
2093                         irq = gaudi_pci_irq_vector(hdev, i, false);
2094                         free_irq(irq, &hdev->completion_queue[i]);
2095                 }
2096         } else {
2097                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2098         }
2099
2100         pci_free_irq_vectors(hdev->pdev);
2101
2102         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2103 }
2104
2105 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2106 {
2107         struct gaudi_device *gaudi = hdev->asic_specific;
2108
2109         if (hdev->asic_prop.fw_security_enabled)
2110                 return;
2111
2112         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2113                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2114                 return;
2115
2116         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2117                 return;
2118
2119         if (!hdev->sram_scrambler_enable)
2120                 return;
2121
2122         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2123                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2124         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2125                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2126         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2127                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2128         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2129                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2130         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2131                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2132         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2133                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2134         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2135                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2136         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2137                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2138
2139         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2140                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2142                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2144                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2146                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2148                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2150                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2152                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2154                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2155
2156         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2157                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2159                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2161                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2163                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2165                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2167                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2168         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2169                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2170         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2171                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2172
2173         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2174 }
2175
2176 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2177 {
2178         struct gaudi_device *gaudi = hdev->asic_specific;
2179
2180         if (hdev->asic_prop.fw_security_enabled)
2181                 return;
2182
2183         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2184                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2185                 return;
2186
2187         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2188                 return;
2189
2190         if (!hdev->dram_scrambler_enable)
2191                 return;
2192
2193         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2194                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2195         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2196                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2197         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2198                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2199         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2200                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2201         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2202                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2203         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2204                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2205         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2206                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2207         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2208                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2209
2210         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2211                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2212         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2213                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2214         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2215                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2216         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2217                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2218         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2219                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2220         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2221                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2222         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2223                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2225                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226
2227         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2228                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2230                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2232                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2234                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2236                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2238                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2239         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2240                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2241         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2242                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2243
2244         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2245 }
2246
2247 static void gaudi_init_e2e(struct hl_device *hdev)
2248 {
2249         if (hdev->asic_prop.fw_security_enabled)
2250                 return;
2251
2252         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2253                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2254                 return;
2255
2256         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2257         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2258         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2259         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2260
2261         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2262         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2263         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2264         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2265
2266         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2267         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2268         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2269         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2270
2271         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2272         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2273         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2274         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2275
2276         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2277         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2278         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2279         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2280
2281         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2282         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2283         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2284         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2285
2286         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2287         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2288         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2289         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2290
2291         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2292         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2293         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2294         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2295
2296         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2297         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2298         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2299         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2300
2301         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2302         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2303         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2304         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2305
2306         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2307         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2308         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2309         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2310
2311         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2312         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2313         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2314         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2315
2316         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2317         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2318         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2319         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2320
2321         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2322         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2323         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2324         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2325
2326         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2327         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2328         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2329         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2330
2331         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2332         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2333         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2334         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2335
2336         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2337         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2338         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2339         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2340
2341         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2342         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2343         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2344         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2345
2346         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2347         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2348         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2349         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2350
2351         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2352         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2353         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2354         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2355
2356         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2357         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2358         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2359         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2360
2361         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2362         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2363         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2364         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2365
2366         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2367         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2368         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2369         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2370
2371         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2372         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2373         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2374         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2375
2376         if (!hdev->dram_scrambler_enable) {
2377                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2378                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2379                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2380                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2381
2382                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2383                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2384                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2385                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2386
2387                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2388                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2389                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2390                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2391
2392                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2393                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2394                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2395                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2396
2397                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2398                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2399                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2400                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2401
2402                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2403                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2404                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2405                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2406
2407                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2408                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2409                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2410                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2411
2412                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2413                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2414                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2415                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2416
2417                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2418                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2419                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2420                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2421
2422                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2423                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2424                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2425                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2426
2427                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2428                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2429                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2430                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2431
2432                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2433                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2434                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2435                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2436
2437                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2438                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2439                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2440                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2441
2442                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2443                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2444                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2445                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2446
2447                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2448                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2449                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2450                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2451
2452                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2453                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2454                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2455                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2456
2457                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2458                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2459                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2460                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2461
2462                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2463                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2464                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2465                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2466
2467                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2468                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2469                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2470                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2471
2472                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2473                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2474                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2475                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2476
2477                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2478                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2479                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2480                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2481
2482                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2483                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2484                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2485                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2486
2487                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2488                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2489                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2490                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2491
2492                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2493                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2494                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2495                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2496         }
2497
2498         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2499                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2500         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2501                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2502
2503         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2504                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2505         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2506                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2507
2508         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2509                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2510         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2511                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2512
2513         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2514                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2515         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2516                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2517
2518         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2519                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2520         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2521                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2522
2523         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2524                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2525         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2526                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2527
2528         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2529                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2530         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2531                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2532
2533         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2534                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2535         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2536                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2537
2538         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2539                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2540         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2541                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2542
2543         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2544                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2545         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2546                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2547
2548         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2549                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2550         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2551                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2552
2553         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2554                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2555         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2556                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2557
2558         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2559                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2560         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2561                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2562
2563         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2564                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2565         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2566                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2567
2568         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2569                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2570         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2571                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2572
2573         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2574                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2575         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2576                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2577
2578         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2579                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2580         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2581                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2582
2583         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2584                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2585         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2586                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2587
2588         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2589                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2590         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2591                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2592
2593         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2594                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2595         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2596                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2597
2598         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2599                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2600         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2601                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2602
2603         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2604                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2605         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2606                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2607
2608         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2609                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2610         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2611                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2612
2613         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2614                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2615         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2616                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2617 }
2618
2619 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2620 {
2621         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2622
2623         if (hdev->asic_prop.fw_security_enabled)
2624                 return;
2625
2626         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2627                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2628                 return;
2629
2630         hbm0_wr = 0x33333333;
2631         hbm0_rd = 0x77777777;
2632         hbm1_wr = 0x55555555;
2633         hbm1_rd = 0xDDDDDDDD;
2634
2635         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2636         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2637         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2638         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2639
2640         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2641         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2642         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2643         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2644
2645         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2646         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2647         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2648         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2649
2650         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2651         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2652         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2653         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2654
2655         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2656                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2657                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2658         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2659                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2660                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2661         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2662                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2663                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2664         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2665                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2666                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2667
2668         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2669                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2670                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2671         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2672                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2673                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2674         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2675                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2676                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2677         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2678                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2679                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2680 }
2681
2682 static void gaudi_init_golden_registers(struct hl_device *hdev)
2683 {
2684         u32 tpc_offset;
2685         int tpc_id, i;
2686
2687         gaudi_init_e2e(hdev);
2688         gaudi_init_hbm_cred(hdev);
2689
2690         for (tpc_id = 0, tpc_offset = 0;
2691                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2692                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2693                 /* Mask all arithmetic interrupts from TPC */
2694                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2695                 /* Set 16 cache lines */
2696                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2697                                 ICACHE_FETCH_LINE_NUM, 2);
2698         }
2699
2700         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2701         for (i = 0 ; i < 128 ; i += 8)
2702                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2703
2704         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2705         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2706         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2707         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2708 }
2709
2710 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2711                                         int qman_id, dma_addr_t qman_pq_addr)
2712 {
2713         struct cpu_dyn_regs *dyn_regs =
2714                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2715         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2716         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2717         u32 q_off, dma_qm_offset;
2718         u32 dma_qm_err_cfg, irq_handler_offset;
2719
2720         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2721
2722         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2723                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2724         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2725                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2726         so_base_en_lo = lower_32_bits(CFG_BASE +
2727                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2728         so_base_en_hi = upper_32_bits(CFG_BASE +
2729                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2730         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2731                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2732         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2733                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2734         so_base_ws_lo = lower_32_bits(CFG_BASE +
2735                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2736         so_base_ws_hi = upper_32_bits(CFG_BASE +
2737                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2738
2739         q_off = dma_qm_offset + qman_id * 4;
2740
2741         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2742         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2743
2744         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2745         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2746         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2747
2748         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2749         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2750                                                         QMAN_LDMA_SRC_OFFSET);
2751         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2752                                                         QMAN_LDMA_DST_OFFSET);
2753
2754         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2755         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2756         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2757         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2758         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2759         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2760         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2761         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2762
2763         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2764
2765         /* The following configuration is needed only once per QMAN */
2766         if (qman_id == 0) {
2767                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2768                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2769                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2770
2771                 /* Configure RAZWI IRQ */
2772                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2773                 if (hdev->stop_on_err)
2774                         dma_qm_err_cfg |=
2775                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2776
2777                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2778
2779                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2780                         lower_32_bits(CFG_BASE + irq_handler_offset));
2781                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2782                         upper_32_bits(CFG_BASE + irq_handler_offset));
2783
2784                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2785                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2786                                                                         dma_id);
2787
2788                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2789                                 QM_ARB_ERR_MSG_EN_MASK);
2790
2791                 /* Increase ARB WDT to support streams architecture */
2792                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2793                                 GAUDI_ARB_WDT_TIMEOUT);
2794
2795                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2796                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2797
2798                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2799         }
2800 }
2801
2802 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2803 {
2804         struct cpu_dyn_regs *dyn_regs =
2805                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2806         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2807         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2808         u32 irq_handler_offset;
2809
2810         /* Set to maximum possible according to physical size */
2811         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2812         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2813
2814         /* WA for H/W bug H3-2116 */
2815         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2816
2817         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2818         if (hdev->stop_on_err)
2819                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2820
2821         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2822
2823         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2824                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2825                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2826
2827         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2828                 lower_32_bits(CFG_BASE + irq_handler_offset));
2829         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2830                 upper_32_bits(CFG_BASE + irq_handler_offset));
2831
2832         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2833                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2834         WREG32(mmDMA0_CORE_PROT + dma_offset,
2835                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2836         /* If the channel is secured, it should be in MMU bypass mode */
2837         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2838                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2839         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2840 }
2841
2842 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2843                                 u32 enable_mask)
2844 {
2845         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2846
2847         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2848 }
2849
2850 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2851 {
2852         struct gaudi_device *gaudi = hdev->asic_specific;
2853         struct hl_hw_queue *q;
2854         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2855
2856         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2857                 return;
2858
2859         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2860                 dma_id = gaudi_dma_assignment[i];
2861                 /*
2862                  * For queues after the CPU Q need to add 1 to get the correct
2863                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2864                  * order to get the correct MSI register.
2865                  */
2866                 if (dma_id > 1) {
2867                         cpu_skip = 1;
2868                         nic_skip = NIC_NUMBER_OF_ENGINES;
2869                 } else {
2870                         cpu_skip = 0;
2871                         nic_skip = 0;
2872                 }
2873
2874                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2875                         q_idx = 4 * dma_id + j + cpu_skip;
2876                         q = &hdev->kernel_queues[q_idx];
2877                         q->cq_id = cq_id++;
2878                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2879                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2880                                                 q->bus_address);
2881                 }
2882
2883                 gaudi_init_dma_core(hdev, dma_id);
2884
2885                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2886         }
2887
2888         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2889 }
2890
2891 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2892                                         int qman_id, u64 qman_base_addr)
2893 {
2894         struct cpu_dyn_regs *dyn_regs =
2895                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2896         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2897         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2898         u32 dma_qm_err_cfg, irq_handler_offset;
2899         u32 q_off, dma_qm_offset;
2900
2901         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2902
2903         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2904                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2905         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2906                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2907         so_base_en_lo = lower_32_bits(CFG_BASE +
2908                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2909         so_base_en_hi = upper_32_bits(CFG_BASE +
2910                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2911         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2912                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2913         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2914                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2915         so_base_ws_lo = lower_32_bits(CFG_BASE +
2916                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2917         so_base_ws_hi = upper_32_bits(CFG_BASE +
2918                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2919
2920         q_off = dma_qm_offset + qman_id * 4;
2921
2922         if (qman_id < 4) {
2923                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2924                                         lower_32_bits(qman_base_addr));
2925                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2926                                         upper_32_bits(qman_base_addr));
2927
2928                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2929                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2930                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2931
2932                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2933                                                         QMAN_CPDMA_SIZE_OFFSET);
2934                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2935                                                         QMAN_CPDMA_SRC_OFFSET);
2936                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2937                                                         QMAN_CPDMA_DST_OFFSET);
2938         } else {
2939                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2940                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2941                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2942
2943                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2944                                                         QMAN_LDMA_SIZE_OFFSET);
2945                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2946                                                         QMAN_LDMA_SRC_OFFSET);
2947                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2948                                                         QMAN_LDMA_DST_OFFSET);
2949
2950                 /* Configure RAZWI IRQ */
2951                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2952                 if (hdev->stop_on_err)
2953                         dma_qm_err_cfg |=
2954                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2955
2956                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2957
2958                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2959                         lower_32_bits(CFG_BASE + irq_handler_offset));
2960                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2961                         upper_32_bits(CFG_BASE + irq_handler_offset));
2962
2963                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2964                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2965                                                                         dma_id);
2966
2967                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2968                                 QM_ARB_ERR_MSG_EN_MASK);
2969
2970                 /* Increase ARB WDT to support streams architecture */
2971                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2972                                 GAUDI_ARB_WDT_TIMEOUT);
2973
2974                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2975                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2976                                 QMAN_INTERNAL_MAKE_TRUSTED);
2977         }
2978
2979         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2980         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2981         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2982         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2983
2984         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2985         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2986                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2987                                 mtr_base_ws_lo);
2988                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2989                                 mtr_base_ws_hi);
2990                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2991                                 so_base_ws_lo);
2992                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2993                                 so_base_ws_hi);
2994         }
2995 }
2996
2997 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2998 {
2999         struct gaudi_device *gaudi = hdev->asic_specific;
3000         struct gaudi_internal_qman_info *q;
3001         u64 qman_base_addr;
3002         int i, j, dma_id, internal_q_index;
3003
3004         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3005                 return;
3006
3007         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3008                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3009
3010                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3011                          /*
3012                           * Add the CPU queue in order to get the correct queue
3013                           * number as all internal queue are placed after it
3014                           */
3015                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3016
3017                         q = &gaudi->internal_qmans[internal_q_index];
3018                         qman_base_addr = (u64) q->pq_dma_addr;
3019                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3020                                                 qman_base_addr);
3021                 }
3022
3023                 /* Initializing lower CP for HBM DMA QMAN */
3024                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3025
3026                 gaudi_init_dma_core(hdev, dma_id);
3027
3028                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3029         }
3030
3031         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3032 }
3033
3034 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3035                                         int qman_id, u64 qman_base_addr)
3036 {
3037         struct cpu_dyn_regs *dyn_regs =
3038                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3039         u32 mtr_base_lo, mtr_base_hi;
3040         u32 so_base_lo, so_base_hi;
3041         u32 irq_handler_offset;
3042         u32 q_off, mme_id;
3043         u32 mme_qm_err_cfg;
3044
3045         mtr_base_lo = lower_32_bits(CFG_BASE +
3046                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3047         mtr_base_hi = upper_32_bits(CFG_BASE +
3048                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3049         so_base_lo = lower_32_bits(CFG_BASE +
3050                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3051         so_base_hi = upper_32_bits(CFG_BASE +
3052                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3053
3054         q_off = mme_offset + qman_id * 4;
3055
3056         if (qman_id < 4) {
3057                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3058                                         lower_32_bits(qman_base_addr));
3059                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3060                                         upper_32_bits(qman_base_addr));
3061
3062                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3063                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3064                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3065
3066                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3067                                                         QMAN_CPDMA_SIZE_OFFSET);
3068                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3069                                                         QMAN_CPDMA_SRC_OFFSET);
3070                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3071                                                         QMAN_CPDMA_DST_OFFSET);
3072         } else {
3073                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3074                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3075                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3076
3077                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3078                                                         QMAN_LDMA_SIZE_OFFSET);
3079                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3080                                                         QMAN_LDMA_SRC_OFFSET);
3081                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3082                                                         QMAN_LDMA_DST_OFFSET);
3083
3084                 /* Configure RAZWI IRQ */
3085                 mme_id = mme_offset /
3086                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3087
3088                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3089                 if (hdev->stop_on_err)
3090                         mme_qm_err_cfg |=
3091                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3092
3093                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3094
3095                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3096                         lower_32_bits(CFG_BASE + irq_handler_offset));
3097                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3098                         upper_32_bits(CFG_BASE + irq_handler_offset));
3099
3100                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3101                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3102                                                                         mme_id);
3103
3104                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3105                                 QM_ARB_ERR_MSG_EN_MASK);
3106
3107                 /* Increase ARB WDT to support streams architecture */
3108                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3109                                 GAUDI_ARB_WDT_TIMEOUT);
3110
3111                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3112                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3113                                 QMAN_INTERNAL_MAKE_TRUSTED);
3114         }
3115
3116         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3117         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3118         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3119         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3120 }
3121
3122 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3123 {
3124         struct gaudi_device *gaudi = hdev->asic_specific;
3125         struct gaudi_internal_qman_info *q;
3126         u64 qman_base_addr;
3127         u32 mme_offset;
3128         int i, internal_q_index;
3129
3130         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3131                 return;
3132
3133         /*
3134          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3135          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3136          */
3137
3138         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3139
3140         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3141                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3142                 q = &gaudi->internal_qmans[internal_q_index];
3143                 qman_base_addr = (u64) q->pq_dma_addr;
3144                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3145                                         qman_base_addr);
3146                 if (i == 3)
3147                         mme_offset = 0;
3148         }
3149
3150         /* Initializing lower CP for MME QMANs */
3151         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3152         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3153         gaudi_init_mme_qman(hdev, 0, 4, 0);
3154
3155         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3156         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3157
3158         gaudi->hw_cap_initialized |= HW_CAP_MME;
3159 }
3160
3161 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3162                                 int qman_id, u64 qman_base_addr)
3163 {
3164         struct cpu_dyn_regs *dyn_regs =
3165                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3166         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3167         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3168         u32 tpc_qm_err_cfg, irq_handler_offset;
3169         u32 q_off, tpc_id;
3170
3171         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3172                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3173         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3174                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3175         so_base_en_lo = lower_32_bits(CFG_BASE +
3176                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3177         so_base_en_hi = upper_32_bits(CFG_BASE +
3178                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3179         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3180                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3181         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3182                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3183         so_base_ws_lo = lower_32_bits(CFG_BASE +
3184                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3185         so_base_ws_hi = upper_32_bits(CFG_BASE +
3186                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3187
3188         q_off = tpc_offset + qman_id * 4;
3189
3190         tpc_id = tpc_offset /
3191                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3192
3193         if (qman_id < 4) {
3194                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3195                                         lower_32_bits(qman_base_addr));
3196                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3197                                         upper_32_bits(qman_base_addr));
3198
3199                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3200                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3201                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3202
3203                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3204                                                         QMAN_CPDMA_SIZE_OFFSET);
3205                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3206                                                         QMAN_CPDMA_SRC_OFFSET);
3207                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3208                                                         QMAN_CPDMA_DST_OFFSET);
3209         } else {
3210                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3211                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3212                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3213
3214                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3215                                                         QMAN_LDMA_SIZE_OFFSET);
3216                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3217                                                         QMAN_LDMA_SRC_OFFSET);
3218                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3219                                                         QMAN_LDMA_DST_OFFSET);
3220
3221                 /* Configure RAZWI IRQ */
3222                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3223                 if (hdev->stop_on_err)
3224                         tpc_qm_err_cfg |=
3225                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3226
3227                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3228
3229                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3230                         lower_32_bits(CFG_BASE + irq_handler_offset));
3231                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3232                         upper_32_bits(CFG_BASE + irq_handler_offset));
3233
3234                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3235                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3236                                                                         tpc_id);
3237
3238                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3239                                 QM_ARB_ERR_MSG_EN_MASK);
3240
3241                 /* Increase ARB WDT to support streams architecture */
3242                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3243                                 GAUDI_ARB_WDT_TIMEOUT);
3244
3245                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3246                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3247                                 QMAN_INTERNAL_MAKE_TRUSTED);
3248         }
3249
3250         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3251         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3252         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3253         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3254
3255         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3256         if (tpc_id == 6) {
3257                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3258                                 mtr_base_ws_lo);
3259                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3260                                 mtr_base_ws_hi);
3261                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3262                                 so_base_ws_lo);
3263                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3264                                 so_base_ws_hi);
3265         }
3266 }
3267
3268 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3269 {
3270         struct gaudi_device *gaudi = hdev->asic_specific;
3271         struct gaudi_internal_qman_info *q;
3272         u64 qman_base_addr;
3273         u32 so_base_hi, tpc_offset = 0;
3274         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3275                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3276         int i, tpc_id, internal_q_index;
3277
3278         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3279                 return;
3280
3281         so_base_hi = upper_32_bits(CFG_BASE +
3282                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3283
3284         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3285                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3286                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3287                                                 tpc_id * QMAN_STREAMS + i;
3288                         q = &gaudi->internal_qmans[internal_q_index];
3289                         qman_base_addr = (u64) q->pq_dma_addr;
3290                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3291                                                 qman_base_addr);
3292
3293                         if (i == 3) {
3294                                 /* Initializing lower CP for TPC QMAN */
3295                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3296
3297                                 /* Enable the QMAN and TPC channel */
3298                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3299                                                 QMAN_TPC_ENABLE);
3300                         }
3301                 }
3302
3303                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3304                                 so_base_hi);
3305
3306                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3307
3308                 gaudi->hw_cap_initialized |=
3309                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3310         }
3311 }
3312
3313 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3314                                 int qman_id, u64 qman_base_addr, int nic_id)
3315 {
3316         struct cpu_dyn_regs *dyn_regs =
3317                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3318         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3319         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3320         u32 nic_qm_err_cfg, irq_handler_offset;
3321         u32 q_off;
3322
3323         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3324                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3325         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3326                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3327         so_base_en_lo = lower_32_bits(CFG_BASE +
3328                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3329         so_base_en_hi = upper_32_bits(CFG_BASE +
3330                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3331         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3332                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3333         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3334                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3335         so_base_ws_lo = lower_32_bits(CFG_BASE +
3336                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3337         so_base_ws_hi = upper_32_bits(CFG_BASE +
3338                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3339
3340         q_off = nic_offset + qman_id * 4;
3341
3342         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3343         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3344
3345         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3346         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3347         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3348
3349         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3350                                                         QMAN_LDMA_SIZE_OFFSET);
3351         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3352                                                         QMAN_LDMA_SRC_OFFSET);
3353         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3354                                                         QMAN_LDMA_DST_OFFSET);
3355
3356         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3357         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3358         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3359         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3360
3361         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3362         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3363         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3364         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3365         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3366
3367         if (qman_id == 0) {
3368                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3369                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3370                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3371
3372                 /* Configure RAZWI IRQ */
3373                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3374                 if (hdev->stop_on_err)
3375                         nic_qm_err_cfg |=
3376                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3377
3378                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3379
3380                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3381                         lower_32_bits(CFG_BASE + irq_handler_offset));
3382                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3383                         upper_32_bits(CFG_BASE + irq_handler_offset));
3384
3385                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3386                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3387                                                                         nic_id);
3388
3389                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3390                                 QM_ARB_ERR_MSG_EN_MASK);
3391
3392                 /* Increase ARB WDT to support streams architecture */
3393                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3394                                 GAUDI_ARB_WDT_TIMEOUT);
3395
3396                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3397                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3398                                 QMAN_INTERNAL_MAKE_TRUSTED);
3399         }
3400 }
3401
3402 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3403 {
3404         struct gaudi_device *gaudi = hdev->asic_specific;
3405         struct gaudi_internal_qman_info *q;
3406         u64 qman_base_addr;
3407         u32 nic_offset = 0;
3408         u32 nic_delta_between_qmans =
3409                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3410         u32 nic_delta_between_nics =
3411                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3412         int i, nic_id, internal_q_index;
3413
3414         if (!hdev->nic_ports_mask)
3415                 return;
3416
3417         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3418                 return;
3419
3420         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3421
3422         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3423                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3424                         nic_offset += nic_delta_between_qmans;
3425                         if (nic_id & 1) {
3426                                 nic_offset -= (nic_delta_between_qmans * 2);
3427                                 nic_offset += nic_delta_between_nics;
3428                         }
3429                         continue;
3430                 }
3431
3432                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3433                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3434                                                 nic_id * QMAN_STREAMS + i;
3435                         q = &gaudi->internal_qmans[internal_q_index];
3436                         qman_base_addr = (u64) q->pq_dma_addr;
3437                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3438                                                 qman_base_addr, nic_id);
3439                 }
3440
3441                 /* Enable the QMAN */
3442                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3443
3444                 nic_offset += nic_delta_between_qmans;
3445                 if (nic_id & 1) {
3446                         nic_offset -= (nic_delta_between_qmans * 2);
3447                         nic_offset += nic_delta_between_nics;
3448                 }
3449
3450                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3451         }
3452 }
3453
3454 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3455 {
3456         struct gaudi_device *gaudi = hdev->asic_specific;
3457
3458         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3459                 return;
3460
3461         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3462         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3463         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3464 }
3465
3466 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3467 {
3468         struct gaudi_device *gaudi = hdev->asic_specific;
3469
3470         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3471                 return;
3472
3473         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3474         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3475         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3476         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3477         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3478 }
3479
3480 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3481 {
3482         struct gaudi_device *gaudi = hdev->asic_specific;
3483
3484         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3485                 return;
3486
3487         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3488         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3489 }
3490
3491 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3492 {
3493         struct gaudi_device *gaudi = hdev->asic_specific;
3494         u32 tpc_offset = 0;
3495         int tpc_id;
3496
3497         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3498                 return;
3499
3500         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3501                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3502                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3503         }
3504 }
3505
3506 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3507 {
3508         struct gaudi_device *gaudi = hdev->asic_specific;
3509         u32 nic_mask, nic_offset = 0;
3510         u32 nic_delta_between_qmans =
3511                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3512         u32 nic_delta_between_nics =
3513                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3514         int nic_id;
3515
3516         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3517                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3518
3519                 if (gaudi->hw_cap_initialized & nic_mask)
3520                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3521
3522                 nic_offset += nic_delta_between_qmans;
3523                 if (nic_id & 1) {
3524                         nic_offset -= (nic_delta_between_qmans * 2);
3525                         nic_offset += nic_delta_between_nics;
3526                 }
3527         }
3528 }
3529
3530 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3531 {
3532         struct gaudi_device *gaudi = hdev->asic_specific;
3533
3534         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3535                 return;
3536
3537         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3538         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3539         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3540         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3541 }
3542
3543 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3544 {
3545         struct gaudi_device *gaudi = hdev->asic_specific;
3546
3547         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3548                 return;
3549
3550         /* Stop CPs of HBM DMA QMANs */
3551
3552         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3553         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3554         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3555         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3556         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3557 }
3558
3559 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3560 {
3561         struct gaudi_device *gaudi = hdev->asic_specific;
3562
3563         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3564                 return;
3565
3566         /* Stop CPs of MME QMANs */
3567         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3568         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3569 }
3570
3571 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3572 {
3573         struct gaudi_device *gaudi = hdev->asic_specific;
3574
3575         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3576                 return;
3577
3578         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3579         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3580         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3581         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3582         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3583         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3584         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3585         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3586 }
3587
3588 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3589 {
3590         struct gaudi_device *gaudi = hdev->asic_specific;
3591
3592         /* Stop upper CPs of QMANs */
3593
3594         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3595                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3596                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3597                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3598                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3599
3600         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3601                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3602                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3603                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3604                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3605
3606         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3607                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3608                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3609                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3610                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3611
3612         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3613                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3614                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3615                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3616                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3617
3618         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3619                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3620                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3621                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3622                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3623
3624         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3625                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3626                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3627                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3628                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3629
3630         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3631                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3632                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3633                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3634                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3635
3636         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3637                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3638                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3639                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3640                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3641
3642         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3643                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3644                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3645                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3646                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3647
3648         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3649                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3650                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3651                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3652                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3653 }
3654
3655 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3656 {
3657         struct gaudi_device *gaudi = hdev->asic_specific;
3658
3659         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3660                 return;
3661
3662         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3663         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3664         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3665 }
3666
3667 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3668 {
3669         struct gaudi_device *gaudi = hdev->asic_specific;
3670
3671         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3672                 return;
3673
3674         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3675         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3676         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3677         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3678         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3679 }
3680
3681 static void gaudi_mme_stall(struct hl_device *hdev)
3682 {
3683         struct gaudi_device *gaudi = hdev->asic_specific;
3684
3685         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3686                 return;
3687
3688         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3689         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3690         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3691         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3692         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3693         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3694         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3695         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3696         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3697         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3698         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3699         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3700         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3701         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3702         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3703         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3704         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3705 }
3706
3707 static void gaudi_tpc_stall(struct hl_device *hdev)
3708 {
3709         struct gaudi_device *gaudi = hdev->asic_specific;
3710
3711         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3712                 return;
3713
3714         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3715         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3716         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3717         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3718         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3719         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3720         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3721         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3722 }
3723
3724 static void gaudi_set_clock_gating(struct hl_device *hdev)
3725 {
3726         struct gaudi_device *gaudi = hdev->asic_specific;
3727         u32 qman_offset;
3728         bool enable;
3729         int i;
3730
3731         /* In case we are during debug session, don't enable the clock gate
3732          * as it may interfere
3733          */
3734         if (hdev->in_debug)
3735                 return;
3736
3737         if (hdev->asic_prop.fw_security_enabled)
3738                 return;
3739
3740         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3741                 enable = !!(hdev->clock_gating_mask &
3742                                 (BIT_ULL(gaudi_dma_assignment[i])));
3743
3744                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3745                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3746                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3747                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3748                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3749         }
3750
3751         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3752                 enable = !!(hdev->clock_gating_mask &
3753                                 (BIT_ULL(gaudi_dma_assignment[i])));
3754
3755                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3756                  * we need to not enable clock gating in that DMA
3757                  */
3758                 if (i == GAUDI_HBM_DMA_4)
3759                         enable = 0;
3760
3761                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3762                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3763                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3764                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3765                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3766         }
3767
3768         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3769         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3770         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3771
3772         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3773         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3774         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3775
3776         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3777                 enable = !!(hdev->clock_gating_mask &
3778                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3779
3780                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3781                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3782                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3783                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3784
3785                 qman_offset += TPC_QMAN_OFFSET;
3786         }
3787
3788         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3789 }
3790
3791 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3792 {
3793         struct gaudi_device *gaudi = hdev->asic_specific;
3794         u32 qman_offset;
3795         int i;
3796
3797         if (hdev->asic_prop.fw_security_enabled)
3798                 return;
3799
3800         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3801                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3802                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3803
3804                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3805         }
3806
3807         WREG32(mmMME0_QM_CGM_CFG, 0);
3808         WREG32(mmMME0_QM_CGM_CFG1, 0);
3809         WREG32(mmMME2_QM_CGM_CFG, 0);
3810         WREG32(mmMME2_QM_CGM_CFG1, 0);
3811
3812         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3813                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3814                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3815
3816                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3817         }
3818
3819         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3820 }
3821
3822 static void gaudi_enable_timestamp(struct hl_device *hdev)
3823 {
3824         /* Disable the timestamp counter */
3825         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3826
3827         /* Zero the lower/upper parts of the 64-bit counter */
3828         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3829         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3830
3831         /* Enable the counter */
3832         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3833 }
3834
3835 static void gaudi_disable_timestamp(struct hl_device *hdev)
3836 {
3837         /* Disable the timestamp counter */
3838         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3839 }
3840
3841 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3842 {
3843         u32 wait_timeout_ms;
3844
3845         dev_info(hdev->dev,
3846                 "Halting compute engines and disabling interrupts\n");
3847
3848         if (hdev->pldm)
3849                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3850         else
3851                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3852
3853         if (fw_reset)
3854                 goto skip_engines;
3855
3856         gaudi_stop_nic_qmans(hdev);
3857         gaudi_stop_mme_qmans(hdev);
3858         gaudi_stop_tpc_qmans(hdev);
3859         gaudi_stop_hbm_dma_qmans(hdev);
3860         gaudi_stop_pci_dma_qmans(hdev);
3861
3862         hdev->asic_funcs->disable_clock_gating(hdev);
3863
3864         msleep(wait_timeout_ms);
3865
3866         gaudi_pci_dma_stall(hdev);
3867         gaudi_hbm_dma_stall(hdev);
3868         gaudi_tpc_stall(hdev);
3869         gaudi_mme_stall(hdev);
3870
3871         msleep(wait_timeout_ms);
3872
3873         gaudi_disable_nic_qmans(hdev);
3874         gaudi_disable_mme_qmans(hdev);
3875         gaudi_disable_tpc_qmans(hdev);
3876         gaudi_disable_hbm_dma_qmans(hdev);
3877         gaudi_disable_pci_dma_qmans(hdev);
3878
3879         gaudi_disable_timestamp(hdev);
3880
3881 skip_engines:
3882         gaudi_disable_msi(hdev);
3883 }
3884
3885 static int gaudi_mmu_init(struct hl_device *hdev)
3886 {
3887         struct asic_fixed_properties *prop = &hdev->asic_prop;
3888         struct gaudi_device *gaudi = hdev->asic_specific;
3889         u64 hop0_addr;
3890         int rc, i;
3891
3892         if (!hdev->mmu_enable)
3893                 return 0;
3894
3895         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3896                 return 0;
3897
3898         for (i = 0 ; i < prop->max_asid ; i++) {
3899                 hop0_addr = prop->mmu_pgt_addr +
3900                                 (i * prop->mmu_hop_table_size);
3901
3902                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3903                 if (rc) {
3904                         dev_err(hdev->dev,
3905                                 "failed to set hop0 addr for asid %d\n", i);
3906                         goto err;
3907                 }
3908         }
3909
3910         /* init MMU cache manage page */
3911         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3912         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3913
3914         /* mem cache invalidation */
3915         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3916
3917         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3918
3919         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3920         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3921
3922         WREG32(mmSTLB_HOP_CONFIGURATION,
3923                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3924
3925         /*
3926          * The H/W expects the first PI after init to be 1. After wraparound
3927          * we'll write 0.
3928          */
3929         gaudi->mmu_cache_inv_pi = 1;
3930
3931         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3932
3933         return 0;
3934
3935 err:
3936         return rc;
3937 }
3938
3939 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3940 {
3941         void __iomem *dst;
3942
3943         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3944
3945         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3946 }
3947
3948 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3949 {
3950         void __iomem *dst;
3951
3952         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3953
3954         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3955 }
3956
3957 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3958 {
3959         struct dynamic_fw_load_mgr *dynamic_loader;
3960         struct cpu_dyn_regs *dyn_regs;
3961
3962         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3963
3964         /*
3965          * here we update initial values for few specific dynamic regs (as
3966          * before reading the first descriptor from FW those value has to be
3967          * hard-coded) in later stages of the protocol those values will be
3968          * updated automatically by reading the FW descriptor so data there
3969          * will always be up-to-date
3970          */
3971         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3972         dyn_regs->kmd_msg_to_cpu =
3973                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3974         dyn_regs->cpu_cmd_status_to_host =
3975                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3976
3977         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3978 }
3979
3980 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3981 {
3982         struct static_fw_load_mgr *static_loader;
3983
3984         static_loader = &hdev->fw_loader.static_loader;
3985
3986         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3987         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3988         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3989         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3990         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3991         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3992         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3993         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3994         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3995         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3996         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3997         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3998         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3999                         GAUDI_PLDM_RESET_WAIT_MSEC :
4000                         GAUDI_CPU_RESET_WAIT_MSEC;
4001 }
4002
4003 static void gaudi_init_firmware_loader(struct hl_device *hdev)
4004 {
4005         struct asic_fixed_properties *prop = &hdev->asic_prop;
4006         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4007
4008         /* fill common fields */
4009         fw_loader->linux_loaded = false;
4010         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
4011         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
4012         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
4013         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
4014         fw_loader->skip_bmc = !hdev->bmc_enable;
4015         fw_loader->sram_bar_id = SRAM_BAR_ID;
4016         fw_loader->dram_bar_id = HBM_BAR_ID;
4017
4018         if (prop->dynamic_fw_load)
4019                 gaudi_init_dynamic_firmware_loader(hdev);
4020         else
4021                 gaudi_init_static_firmware_loader(hdev);
4022 }
4023
4024 static int gaudi_init_cpu(struct hl_device *hdev)
4025 {
4026         struct gaudi_device *gaudi = hdev->asic_specific;
4027         int rc;
4028
4029         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4030                 return 0;
4031
4032         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4033                 return 0;
4034
4035         /*
4036          * The device CPU works with 40 bits addresses.
4037          * This register sets the extension to 50 bits.
4038          */
4039         if (!hdev->asic_prop.fw_security_enabled)
4040                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4041
4042         rc = hl_fw_init_cpu(hdev);
4043
4044         if (rc)
4045                 return rc;
4046
4047         gaudi->hw_cap_initialized |= HW_CAP_CPU;
4048
4049         return 0;
4050 }
4051
4052 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4053 {
4054         struct cpu_dyn_regs *dyn_regs =
4055                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4056         struct asic_fixed_properties *prop = &hdev->asic_prop;
4057         struct gaudi_device *gaudi = hdev->asic_specific;
4058         u32 status, irq_handler_offset;
4059         struct hl_eq *eq;
4060         struct hl_hw_queue *cpu_pq =
4061                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4062         int err;
4063
4064         if (!hdev->cpu_queues_enable)
4065                 return 0;
4066
4067         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4068                 return 0;
4069
4070         eq = &hdev->event_queue;
4071
4072         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4073         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4074
4075         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4076         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4077
4078         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4079                         lower_32_bits(hdev->cpu_accessible_dma_address));
4080         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4081                         upper_32_bits(hdev->cpu_accessible_dma_address));
4082
4083         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4084         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4085         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4086
4087         /* Used for EQ CI */
4088         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4089
4090         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4091
4092         if (gaudi->multi_msi_mode)
4093                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4094         else
4095                 WREG32(mmCPU_IF_QUEUE_INIT,
4096                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4097
4098         irq_handler_offset = prop->gic_interrupts_enable ?
4099                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4100                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4101
4102         WREG32(irq_handler_offset,
4103                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4104
4105         err = hl_poll_timeout(
4106                 hdev,
4107                 mmCPU_IF_QUEUE_INIT,
4108                 status,
4109                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4110                 1000,
4111                 cpu_timeout);
4112
4113         if (err) {
4114                 dev_err(hdev->dev,
4115                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4116                 return -EIO;
4117         }
4118
4119         /* update FW application security bits */
4120         if (prop->fw_cpu_boot_dev_sts0_valid)
4121                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4122         if (prop->fw_cpu_boot_dev_sts1_valid)
4123                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4124
4125         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4126         return 0;
4127 }
4128
4129 static void gaudi_pre_hw_init(struct hl_device *hdev)
4130 {
4131         /* Perform read from the device to make sure device is up */
4132         RREG32(mmHW_STATE);
4133
4134         if (!hdev->asic_prop.fw_security_enabled) {
4135                 /* Set the access through PCI bars (Linux driver only) as
4136                  * secured
4137                  */
4138                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4139                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4140                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4141
4142                 /* Perform read to flush the waiting writes to ensure
4143                  * configuration was set in the device
4144                  */
4145                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4146         }
4147
4148         /*
4149          * Let's mark in the H/W that we have reached this point. We check
4150          * this value in the reset_before_init function to understand whether
4151          * we need to reset the chip before doing H/W init. This register is
4152          * cleared by the H/W upon H/W reset
4153          */
4154         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4155 }
4156
4157 static int gaudi_hw_init(struct hl_device *hdev)
4158 {
4159         struct gaudi_device *gaudi = hdev->asic_specific;
4160         int rc;
4161
4162         gaudi_pre_hw_init(hdev);
4163
4164         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4165          * So we set it here and if anyone tries to move it later to
4166          * a different address, there will be an error
4167          */
4168         if (hdev->asic_prop.iatu_done_by_fw)
4169                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4170
4171         /*
4172          * Before pushing u-boot/linux to device, need to set the hbm bar to
4173          * base address of dram
4174          */
4175         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4176                 dev_err(hdev->dev,
4177                         "failed to map HBM bar to DRAM base address\n");
4178                 return -EIO;
4179         }
4180
4181         rc = gaudi_init_cpu(hdev);
4182         if (rc) {
4183                 dev_err(hdev->dev, "failed to initialize CPU\n");
4184                 return rc;
4185         }
4186
4187         /* In case the clock gating was enabled in preboot we need to disable
4188          * it here before touching the MME/TPC registers.
4189          * There is no need to take clk gating mutex because when this function
4190          * runs, no other relevant code can run
4191          */
4192         hdev->asic_funcs->disable_clock_gating(hdev);
4193
4194         /* SRAM scrambler must be initialized after CPU is running from HBM */
4195         gaudi_init_scrambler_sram(hdev);
4196
4197         /* This is here just in case we are working without CPU */
4198         gaudi_init_scrambler_hbm(hdev);
4199
4200         gaudi_init_golden_registers(hdev);
4201
4202         rc = gaudi_mmu_init(hdev);
4203         if (rc)
4204                 return rc;
4205
4206         gaudi_init_security(hdev);
4207
4208         gaudi_init_pci_dma_qmans(hdev);
4209
4210         gaudi_init_hbm_dma_qmans(hdev);
4211
4212         gaudi_init_mme_qmans(hdev);
4213
4214         gaudi_init_tpc_qmans(hdev);
4215
4216         gaudi_init_nic_qmans(hdev);
4217
4218         hdev->asic_funcs->set_clock_gating(hdev);
4219
4220         gaudi_enable_timestamp(hdev);
4221
4222         /* MSI must be enabled before CPU queues and NIC are initialized */
4223         rc = gaudi_enable_msi(hdev);
4224         if (rc)
4225                 goto disable_queues;
4226
4227         /* must be called after MSI was enabled */
4228         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4229         if (rc) {
4230                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4231                         rc);
4232                 goto disable_msi;
4233         }
4234
4235         /* Perform read from the device to flush all configuration */
4236         RREG32(mmHW_STATE);
4237
4238         return 0;
4239
4240 disable_msi:
4241         gaudi_disable_msi(hdev);
4242 disable_queues:
4243         gaudi_disable_mme_qmans(hdev);
4244         gaudi_disable_pci_dma_qmans(hdev);
4245
4246         return rc;
4247 }
4248
4249 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4250 {
4251         struct cpu_dyn_regs *dyn_regs =
4252                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4253         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4254         struct gaudi_device *gaudi = hdev->asic_specific;
4255         bool driver_performs_reset;
4256
4257         if (!hard_reset) {
4258                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4259                 return;
4260         }
4261
4262         if (hdev->pldm) {
4263                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4264                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4265         } else {
4266                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4267                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4268         }
4269
4270         if (fw_reset) {
4271                 dev_info(hdev->dev,
4272                         "Firmware performs HARD reset, going to wait %dms\n",
4273                         reset_timeout_ms);
4274
4275                 goto skip_reset;
4276         }
4277
4278         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4279                                         !hdev->asic_prop.hard_reset_done_by_fw);
4280
4281         /* Set device to handle FLR by H/W as we will put the device CPU to
4282          * halt mode
4283          */
4284         if (driver_performs_reset)
4285                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4286                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4287
4288         /* If linux is loaded in the device CPU we need to communicate with it
4289          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4290          * registers in case of old F/Ws
4291          */
4292         if (hdev->fw_loader.linux_loaded) {
4293                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4294                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4295                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4296
4297                 WREG32(irq_handler_offset,
4298                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4299         } else {
4300                 if (hdev->asic_prop.hard_reset_done_by_fw)
4301                         hl_fw_ask_hard_reset_without_linux(hdev);
4302                 else
4303                         hl_fw_ask_halt_machine_without_linux(hdev);
4304         }
4305
4306         if (driver_performs_reset) {
4307
4308                 /* Configure the reset registers. Must be done as early as
4309                  * possible in case we fail during H/W initialization
4310                  */
4311                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4312                                                 (CFG_RST_H_DMA_MASK |
4313                                                 CFG_RST_H_MME_MASK |
4314                                                 CFG_RST_H_SM_MASK |
4315                                                 CFG_RST_H_TPC_7_MASK));
4316
4317                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4318
4319                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4320                                                 (CFG_RST_H_HBM_MASK |
4321                                                 CFG_RST_H_TPC_7_MASK |
4322                                                 CFG_RST_H_NIC_MASK |
4323                                                 CFG_RST_H_SM_MASK |
4324                                                 CFG_RST_H_DMA_MASK |
4325                                                 CFG_RST_H_MME_MASK |
4326                                                 CFG_RST_H_CPU_MASK |
4327                                                 CFG_RST_H_MMU_MASK));
4328
4329                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4330                                                 (CFG_RST_L_IF_MASK |
4331                                                 CFG_RST_L_PSOC_MASK |
4332                                                 CFG_RST_L_TPC_MASK));
4333
4334                 msleep(cpu_timeout_ms);
4335
4336                 /* Tell ASIC not to re-initialize PCIe */
4337                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4338
4339                 /* Restart BTL/BLR upon hard-reset */
4340                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4341
4342                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4343                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4344
4345                 dev_info(hdev->dev,
4346                         "Issued HARD reset command, going to wait %dms\n",
4347                         reset_timeout_ms);
4348         } else {
4349                 dev_info(hdev->dev,
4350                         "Firmware performs HARD reset, going to wait %dms\n",
4351                         reset_timeout_ms);
4352         }
4353
4354 skip_reset:
4355         /*
4356          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4357          * itself is in reset. Need to wait until the reset is deasserted
4358          */
4359         msleep(reset_timeout_ms);
4360
4361         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4362         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4363                 dev_err(hdev->dev,
4364                         "Timeout while waiting for device to reset 0x%x\n",
4365                         status);
4366
4367         if (gaudi) {
4368                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4369                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4370                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4371                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4372                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4373                                 HW_CAP_SRAM_SCRAMBLER |
4374                                 HW_CAP_HBM_SCRAMBLER |
4375                                 HW_CAP_CLK_GATE);
4376
4377                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4378
4379                 hdev->device_cpu_is_halted = false;
4380         }
4381 }
4382
4383 static int gaudi_suspend(struct hl_device *hdev)
4384 {
4385         int rc;
4386
4387         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4388         if (rc)
4389                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4390
4391         return rc;
4392 }
4393
4394 static int gaudi_resume(struct hl_device *hdev)
4395 {
4396         return gaudi_init_iatu(hdev);
4397 }
4398
4399 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4400                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4401 {
4402         int rc;
4403
4404         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4405                         VM_DONTCOPY | VM_NORESERVE;
4406
4407         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4408                                 (dma_addr - HOST_PHYS_BASE), size);
4409         if (rc)
4410                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4411
4412         return rc;
4413 }
4414
4415 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4416 {
4417         struct cpu_dyn_regs *dyn_regs =
4418                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4419         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4420         struct gaudi_device *gaudi = hdev->asic_specific;
4421         bool invalid_queue = false;
4422         int dma_id;
4423
4424         switch (hw_queue_id) {
4425         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4426                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4427                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4428                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4429                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4430                 break;
4431
4432         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4433                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4434                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4435                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4436                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4437                 break;
4438
4439         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4440                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4441                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4442                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4443                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4444                 break;
4445
4446         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4447                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4448                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4449                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4450                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4451                 break;
4452
4453         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4454                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4455                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4456                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4457                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4458                 break;
4459
4460         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4461                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4462                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4463                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4464                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4465                 break;
4466
4467         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4468                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4469                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4470                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4471                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4475                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4476                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4477                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4478                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4479                 break;
4480
4481         case GAUDI_QUEUE_ID_CPU_PQ:
4482                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4483                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4484                 else
4485                         invalid_queue = true;
4486                 break;
4487
4488         case GAUDI_QUEUE_ID_MME_0_0:
4489                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4490                 break;
4491
4492         case GAUDI_QUEUE_ID_MME_0_1:
4493                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4494                 break;
4495
4496         case GAUDI_QUEUE_ID_MME_0_2:
4497                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4498                 break;
4499
4500         case GAUDI_QUEUE_ID_MME_0_3:
4501                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4502                 break;
4503
4504         case GAUDI_QUEUE_ID_MME_1_0:
4505                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4506                 break;
4507
4508         case GAUDI_QUEUE_ID_MME_1_1:
4509                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4510                 break;
4511
4512         case GAUDI_QUEUE_ID_MME_1_2:
4513                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4514                 break;
4515
4516         case GAUDI_QUEUE_ID_MME_1_3:
4517                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4518                 break;
4519
4520         case GAUDI_QUEUE_ID_TPC_0_0:
4521                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4522                 break;
4523
4524         case GAUDI_QUEUE_ID_TPC_0_1:
4525                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4526                 break;
4527
4528         case GAUDI_QUEUE_ID_TPC_0_2:
4529                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4530                 break;
4531
4532         case GAUDI_QUEUE_ID_TPC_0_3:
4533                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4534                 break;
4535
4536         case GAUDI_QUEUE_ID_TPC_1_0:
4537                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4538                 break;
4539
4540         case GAUDI_QUEUE_ID_TPC_1_1:
4541                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4542                 break;
4543
4544         case GAUDI_QUEUE_ID_TPC_1_2:
4545                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4546                 break;
4547
4548         case GAUDI_QUEUE_ID_TPC_1_3:
4549                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4550                 break;
4551
4552         case GAUDI_QUEUE_ID_TPC_2_0:
4553                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4554                 break;
4555
4556         case GAUDI_QUEUE_ID_TPC_2_1:
4557                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4558                 break;
4559
4560         case GAUDI_QUEUE_ID_TPC_2_2:
4561                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4562                 break;
4563
4564         case GAUDI_QUEUE_ID_TPC_2_3:
4565                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4566                 break;
4567
4568         case GAUDI_QUEUE_ID_TPC_3_0:
4569                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4570                 break;
4571
4572         case GAUDI_QUEUE_ID_TPC_3_1:
4573                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4574                 break;
4575
4576         case GAUDI_QUEUE_ID_TPC_3_2:
4577                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4578                 break;
4579
4580         case GAUDI_QUEUE_ID_TPC_3_3:
4581                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4582                 break;
4583
4584         case GAUDI_QUEUE_ID_TPC_4_0:
4585                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4586                 break;
4587
4588         case GAUDI_QUEUE_ID_TPC_4_1:
4589                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4590                 break;
4591
4592         case GAUDI_QUEUE_ID_TPC_4_2:
4593                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4594                 break;
4595
4596         case GAUDI_QUEUE_ID_TPC_4_3:
4597                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4598                 break;
4599
4600         case GAUDI_QUEUE_ID_TPC_5_0:
4601                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4602                 break;
4603
4604         case GAUDI_QUEUE_ID_TPC_5_1:
4605                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4606                 break;
4607
4608         case GAUDI_QUEUE_ID_TPC_5_2:
4609                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4610                 break;
4611
4612         case GAUDI_QUEUE_ID_TPC_5_3:
4613                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4614                 break;
4615
4616         case GAUDI_QUEUE_ID_TPC_6_0:
4617                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4618                 break;
4619
4620         case GAUDI_QUEUE_ID_TPC_6_1:
4621                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4622                 break;
4623
4624         case GAUDI_QUEUE_ID_TPC_6_2:
4625                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4626                 break;
4627
4628         case GAUDI_QUEUE_ID_TPC_6_3:
4629                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4630                 break;
4631
4632         case GAUDI_QUEUE_ID_TPC_7_0:
4633                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4634                 break;
4635
4636         case GAUDI_QUEUE_ID_TPC_7_1:
4637                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4638                 break;
4639
4640         case GAUDI_QUEUE_ID_TPC_7_2:
4641                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4642                 break;
4643
4644         case GAUDI_QUEUE_ID_TPC_7_3:
4645                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4646                 break;
4647
4648         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4649                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4650                         invalid_queue = true;
4651
4652                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4653                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4654                 break;
4655
4656         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4657                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4658                         invalid_queue = true;
4659
4660                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4661                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4662                 break;
4663
4664         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4665                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4666                         invalid_queue = true;
4667
4668                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4669                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4670                 break;
4671
4672         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4673                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4674                         invalid_queue = true;
4675
4676                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4677                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4678                 break;
4679
4680         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4681                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4682                         invalid_queue = true;
4683
4684                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4685                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4686                 break;
4687
4688         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4689                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4690                         invalid_queue = true;
4691
4692                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4693                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4694                 break;
4695
4696         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4697                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4698                         invalid_queue = true;
4699
4700                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4701                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4702                 break;
4703
4704         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4705                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4706                         invalid_queue = true;
4707
4708                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4709                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4710                 break;
4711
4712         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4713                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4714                         invalid_queue = true;
4715
4716                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4717                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4718                 break;
4719
4720         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4721                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4722                         invalid_queue = true;
4723
4724                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4725                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4726                 break;
4727
4728         default:
4729                 invalid_queue = true;
4730         }
4731
4732         if (invalid_queue) {
4733                 /* Should never get here */
4734                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4735                         hw_queue_id);
4736                 return;
4737         }
4738
4739         db_value = pi;
4740
4741         /* ring the doorbell */
4742         WREG32(db_reg_offset, db_value);
4743
4744         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4745                 /* make sure device CPU will read latest data from host */
4746                 mb();
4747
4748                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4749                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4750                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4751
4752                 WREG32(irq_handler_offset,
4753                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4754         }
4755 }
4756
4757 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4758                                 struct hl_bd *bd)
4759 {
4760         __le64 *pbd = (__le64 *) bd;
4761
4762         /* The QMANs are on the host memory so a simple copy suffice */
4763         pqe[0] = pbd[0];
4764         pqe[1] = pbd[1];
4765 }
4766
4767 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4768                                         dma_addr_t *dma_handle, gfp_t flags)
4769 {
4770         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4771                                                 dma_handle, flags);
4772
4773         /* Shift to the device's base physical address of host memory */
4774         if (kernel_addr)
4775                 *dma_handle += HOST_PHYS_BASE;
4776
4777         return kernel_addr;
4778 }
4779
4780 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4781                 void *cpu_addr, dma_addr_t dma_handle)
4782 {
4783         /* Cancel the device's base physical address of host memory */
4784         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4785
4786         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4787 }
4788
4789 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4790 {
4791         struct asic_fixed_properties *prop = &hdev->asic_prop;
4792         u64  cur_addr = DRAM_BASE_ADDR_USER;
4793         u32 val;
4794         u32 chunk_size;
4795         int rc, dma_id;
4796
4797         while (cur_addr < prop->dram_end_address) {
4798                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4799                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4800
4801                         chunk_size =
4802                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4803
4804                         dev_dbg(hdev->dev,
4805                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4806                                 cur_addr, cur_addr + chunk_size);
4807
4808                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4809                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4810                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4811                                                 lower_32_bits(cur_addr));
4812                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4813                                                 upper_32_bits(cur_addr));
4814                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4815                                         chunk_size);
4816                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4817                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4818                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4819
4820                         cur_addr += chunk_size;
4821
4822                         if (cur_addr == prop->dram_end_address)
4823                                 break;
4824                 }
4825
4826                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4827                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4828
4829                         rc = hl_poll_timeout(
4830                                 hdev,
4831                                 mmDMA0_CORE_STS0 + dma_offset,
4832                                 val,
4833                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4834                                 1000,
4835                                 HBM_SCRUBBING_TIMEOUT_US);
4836
4837                         if (rc) {
4838                                 dev_err(hdev->dev,
4839                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4840                                         dma_id);
4841                                 return -EIO;
4842                         }
4843                 }
4844         }
4845
4846         return 0;
4847 }
4848
4849 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4850 {
4851         struct asic_fixed_properties *prop = &hdev->asic_prop;
4852         struct gaudi_device *gaudi = hdev->asic_specific;
4853         int rc = 0;
4854         u64 val = 0;
4855
4856         if (!hdev->memory_scrub)
4857                 return 0;
4858
4859         if (!addr && !size) {
4860                 /* Wait till device is idle */
4861                 rc = hl_poll_timeout(
4862                                 hdev,
4863                                 mmDMA0_CORE_STS0/* dummy */,
4864                                 val/* dummy */,
4865                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4866                                                 0, NULL)),
4867                                                 1000,
4868                                                 HBM_SCRUBBING_TIMEOUT_US);
4869                 if (rc) {
4870                         dev_err(hdev->dev, "waiting for idle timeout\n");
4871                         return -EIO;
4872                 }
4873
4874                 /* Scrub SRAM */
4875                 addr = prop->sram_user_base_address;
4876                 size = hdev->pldm ? 0x10000 :
4877                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4878                 val = 0x7777777777777777ull;
4879
4880                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4881                 if (rc) {
4882                         dev_err(hdev->dev,
4883                                 "Failed to clear SRAM in mem scrub all\n");
4884                         return rc;
4885                 }
4886
4887                 mutex_lock(&gaudi->clk_gate_mutex);
4888                 hdev->asic_funcs->disable_clock_gating(hdev);
4889
4890                 /* Scrub HBM using all DMA channels in parallel */
4891                 rc = gaudi_hbm_scrubbing(hdev);
4892                 if (rc)
4893                         dev_err(hdev->dev,
4894                                 "Failed to clear HBM in mem scrub all\n");
4895
4896                 hdev->asic_funcs->set_clock_gating(hdev);
4897                 mutex_unlock(&gaudi->clk_gate_mutex);
4898         }
4899
4900         return rc;
4901 }
4902
4903 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4904                                 u32 queue_id, dma_addr_t *dma_handle,
4905                                 u16 *queue_len)
4906 {
4907         struct gaudi_device *gaudi = hdev->asic_specific;
4908         struct gaudi_internal_qman_info *q;
4909
4910         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4911                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4912                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4913                 return NULL;
4914         }
4915
4916         q = &gaudi->internal_qmans[queue_id];
4917         *dma_handle = q->pq_dma_addr;
4918         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4919
4920         return q->pq_kernel_addr;
4921 }
4922
4923 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4924                                 u16 len, u32 timeout, u64 *result)
4925 {
4926         struct gaudi_device *gaudi = hdev->asic_specific;
4927
4928         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4929                 if (result)
4930                         *result = 0;
4931                 return 0;
4932         }
4933
4934         if (!timeout)
4935                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4936
4937         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4938                                                 timeout, result);
4939 }
4940
4941 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4942 {
4943         struct packet_msg_prot *fence_pkt;
4944         dma_addr_t pkt_dma_addr;
4945         u32 fence_val, tmp, timeout_usec;
4946         dma_addr_t fence_dma_addr;
4947         u32 *fence_ptr;
4948         int rc;
4949
4950         if (hdev->pldm)
4951                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4952         else
4953                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4954
4955         fence_val = GAUDI_QMAN0_FENCE_VAL;
4956
4957         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4958                                                         &fence_dma_addr);
4959         if (!fence_ptr) {
4960                 dev_err(hdev->dev,
4961                         "Failed to allocate memory for H/W queue %d testing\n",
4962                         hw_queue_id);
4963                 return -ENOMEM;
4964         }
4965
4966         *fence_ptr = 0;
4967
4968         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4969                                         sizeof(struct packet_msg_prot),
4970                                         GFP_KERNEL, &pkt_dma_addr);
4971         if (!fence_pkt) {
4972                 dev_err(hdev->dev,
4973                         "Failed to allocate packet for H/W queue %d testing\n",
4974                         hw_queue_id);
4975                 rc = -ENOMEM;
4976                 goto free_fence_ptr;
4977         }
4978
4979         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4980         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4981         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4982
4983         fence_pkt->ctl = cpu_to_le32(tmp);
4984         fence_pkt->value = cpu_to_le32(fence_val);
4985         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4986
4987         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4988                                         sizeof(struct packet_msg_prot),
4989                                         pkt_dma_addr);
4990         if (rc) {
4991                 dev_err(hdev->dev,
4992                         "Failed to send fence packet to H/W queue %d\n",
4993                         hw_queue_id);
4994                 goto free_pkt;
4995         }
4996
4997         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4998                                         1000, timeout_usec, true);
4999
5000         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
5001
5002         if (rc == -ETIMEDOUT) {
5003                 dev_err(hdev->dev,
5004                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
5005                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
5006                 rc = -EIO;
5007         }
5008
5009 free_pkt:
5010         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
5011                                         pkt_dma_addr);
5012 free_fence_ptr:
5013         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5014                                         fence_dma_addr);
5015         return rc;
5016 }
5017
5018 static int gaudi_test_cpu_queue(struct hl_device *hdev)
5019 {
5020         struct gaudi_device *gaudi = hdev->asic_specific;
5021
5022         /*
5023          * check capability here as send_cpu_message() won't update the result
5024          * value if no capability
5025          */
5026         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
5027                 return 0;
5028
5029         return hl_fw_test_cpu_queue(hdev);
5030 }
5031
5032 static int gaudi_test_queues(struct hl_device *hdev)
5033 {
5034         int i, rc, ret_val = 0;
5035
5036         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5037                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5038                         rc = gaudi_test_queue(hdev, i);
5039                         if (rc)
5040                                 ret_val = -EINVAL;
5041                 }
5042         }
5043
5044         rc = gaudi_test_cpu_queue(hdev);
5045         if (rc)
5046                 ret_val = -EINVAL;
5047
5048         return ret_val;
5049 }
5050
5051 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5052                 gfp_t mem_flags, dma_addr_t *dma_handle)
5053 {
5054         void *kernel_addr;
5055
5056         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5057                 return NULL;
5058
5059         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5060
5061         /* Shift to the device's base physical address of host memory */
5062         if (kernel_addr)
5063                 *dma_handle += HOST_PHYS_BASE;
5064
5065         return kernel_addr;
5066 }
5067
5068 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5069                         dma_addr_t dma_addr)
5070 {
5071         /* Cancel the device's base physical address of host memory */
5072         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5073
5074         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5075 }
5076
5077 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5078                                         size_t size, dma_addr_t *dma_handle)
5079 {
5080         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5081 }
5082
5083 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5084                                                 size_t size, void *vaddr)
5085 {
5086         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5087 }
5088
5089 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5090                         int nents, enum dma_data_direction dir)
5091 {
5092         struct scatterlist *sg;
5093         int i;
5094
5095         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5096                 return -ENOMEM;
5097
5098         /* Shift to the device's base physical address of host memory */
5099         for_each_sg(sgl, sg, nents, i)
5100                 sg->dma_address += HOST_PHYS_BASE;
5101
5102         return 0;
5103 }
5104
5105 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5106                         int nents, enum dma_data_direction dir)
5107 {
5108         struct scatterlist *sg;
5109         int i;
5110
5111         /* Cancel the device's base physical address of host memory */
5112         for_each_sg(sgl, sg, nents, i)
5113                 sg->dma_address -= HOST_PHYS_BASE;
5114
5115         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5116 }
5117
5118 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5119                                         struct sg_table *sgt)
5120 {
5121         struct scatterlist *sg, *sg_next_iter;
5122         u32 count, dma_desc_cnt;
5123         u64 len, len_next;
5124         dma_addr_t addr, addr_next;
5125
5126         dma_desc_cnt = 0;
5127
5128         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5129
5130                 len = sg_dma_len(sg);
5131                 addr = sg_dma_address(sg);
5132
5133                 if (len == 0)
5134                         break;
5135
5136                 while ((count + 1) < sgt->nents) {
5137                         sg_next_iter = sg_next(sg);
5138                         len_next = sg_dma_len(sg_next_iter);
5139                         addr_next = sg_dma_address(sg_next_iter);
5140
5141                         if (len_next == 0)
5142                                 break;
5143
5144                         if ((addr + len == addr_next) &&
5145                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5146                                 len += len_next;
5147                                 count++;
5148                                 sg = sg_next_iter;
5149                         } else {
5150                                 break;
5151                         }
5152                 }
5153
5154                 dma_desc_cnt++;
5155         }
5156
5157         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5158 }
5159
5160 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5161                                 struct hl_cs_parser *parser,
5162                                 struct packet_lin_dma *user_dma_pkt,
5163                                 u64 addr, enum dma_data_direction dir)
5164 {
5165         struct hl_userptr *userptr;
5166         int rc;
5167
5168         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5169                         parser->job_userptr_list, &userptr))
5170                 goto already_pinned;
5171
5172         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5173         if (!userptr)
5174                 return -ENOMEM;
5175
5176         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5177                                 userptr);
5178         if (rc)
5179                 goto free_userptr;
5180
5181         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5182
5183         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5184                                         userptr->sgt->nents, dir);
5185         if (rc) {
5186                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5187                 goto unpin_memory;
5188         }
5189
5190         userptr->dma_mapped = true;
5191         userptr->dir = dir;
5192
5193 already_pinned:
5194         parser->patched_cb_size +=
5195                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5196
5197         return 0;
5198
5199 unpin_memory:
5200         list_del(&userptr->job_node);
5201         hl_unpin_host_memory(hdev, userptr);
5202 free_userptr:
5203         kfree(userptr);
5204         return rc;
5205 }
5206
5207 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5208                                 struct hl_cs_parser *parser,
5209                                 struct packet_lin_dma *user_dma_pkt,
5210                                 bool src_in_host)
5211 {
5212         enum dma_data_direction dir;
5213         bool skip_host_mem_pin = false, user_memset;
5214         u64 addr;
5215         int rc = 0;
5216
5217         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5218                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5219                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5220
5221         if (src_in_host) {
5222                 if (user_memset)
5223                         skip_host_mem_pin = true;
5224
5225                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5226                 dir = DMA_TO_DEVICE;
5227                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5228         } else {
5229                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5230                 dir = DMA_FROM_DEVICE;
5231                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5232                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5233                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5234         }
5235
5236         if (skip_host_mem_pin)
5237                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5238         else
5239                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5240                                                 addr, dir);
5241
5242         return rc;
5243 }
5244
5245 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5246                                 struct hl_cs_parser *parser,
5247                                 struct packet_lin_dma *user_dma_pkt)
5248 {
5249         bool src_in_host = false;
5250         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5251                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5252                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5253
5254         dev_dbg(hdev->dev, "DMA packet details:\n");
5255         dev_dbg(hdev->dev, "source == 0x%llx\n",
5256                                 le64_to_cpu(user_dma_pkt->src_addr));
5257         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5258         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5259
5260         /*
5261          * Special handling for DMA with size 0. Bypass all validations
5262          * because no transactions will be done except for WR_COMP, which
5263          * is not a security issue
5264          */
5265         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5266                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5267                 return 0;
5268         }
5269
5270         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5271                 src_in_host = true;
5272
5273         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5274                                                 src_in_host);
5275 }
5276
5277 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5278                                         struct hl_cs_parser *parser,
5279                                         struct packet_load_and_exe *user_pkt)
5280 {
5281         u32 cfg;
5282
5283         cfg = le32_to_cpu(user_pkt->cfg);
5284
5285         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5286                 dev_err(hdev->dev,
5287                         "User not allowed to use Load and Execute\n");
5288                 return -EPERM;
5289         }
5290
5291         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5292
5293         return 0;
5294 }
5295
5296 static int gaudi_validate_cb(struct hl_device *hdev,
5297                         struct hl_cs_parser *parser, bool is_mmu)
5298 {
5299         u32 cb_parsed_length = 0;
5300         int rc = 0;
5301
5302         parser->patched_cb_size = 0;
5303
5304         /* cb_user_size is more than 0 so loop will always be executed */
5305         while (cb_parsed_length < parser->user_cb_size) {
5306                 enum packet_id pkt_id;
5307                 u16 pkt_size;
5308                 struct gaudi_packet *user_pkt;
5309
5310                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5311
5312                 pkt_id = (enum packet_id) (
5313                                 (le64_to_cpu(user_pkt->header) &
5314                                 PACKET_HEADER_PACKET_ID_MASK) >>
5315                                         PACKET_HEADER_PACKET_ID_SHIFT);
5316
5317                 if (!validate_packet_id(pkt_id)) {
5318                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5319                         rc = -EINVAL;
5320                         break;
5321                 }
5322
5323                 pkt_size = gaudi_packet_sizes[pkt_id];
5324                 cb_parsed_length += pkt_size;
5325                 if (cb_parsed_length > parser->user_cb_size) {
5326                         dev_err(hdev->dev,
5327                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5328                         rc = -EINVAL;
5329                         break;
5330                 }
5331
5332                 switch (pkt_id) {
5333                 case PACKET_MSG_PROT:
5334                         dev_err(hdev->dev,
5335                                 "User not allowed to use MSG_PROT\n");
5336                         rc = -EPERM;
5337                         break;
5338
5339                 case PACKET_CP_DMA:
5340                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5341                         rc = -EPERM;
5342                         break;
5343
5344                 case PACKET_STOP:
5345                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5346                         rc = -EPERM;
5347                         break;
5348
5349                 case PACKET_WREG_BULK:
5350                         dev_err(hdev->dev,
5351                                 "User not allowed to use WREG_BULK\n");
5352                         rc = -EPERM;
5353                         break;
5354
5355                 case PACKET_LOAD_AND_EXE:
5356                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5357                                 (struct packet_load_and_exe *) user_pkt);
5358                         break;
5359
5360                 case PACKET_LIN_DMA:
5361                         parser->contains_dma_pkt = true;
5362                         if (is_mmu)
5363                                 parser->patched_cb_size += pkt_size;
5364                         else
5365                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5366                                         (struct packet_lin_dma *) user_pkt);
5367                         break;
5368
5369                 case PACKET_WREG_32:
5370                 case PACKET_MSG_LONG:
5371                 case PACKET_MSG_SHORT:
5372                 case PACKET_REPEAT:
5373                 case PACKET_FENCE:
5374                 case PACKET_NOP:
5375                 case PACKET_ARB_POINT:
5376                         parser->patched_cb_size += pkt_size;
5377                         break;
5378
5379                 default:
5380                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5381                                 pkt_id);
5382                         rc = -EINVAL;
5383                         break;
5384                 }
5385
5386                 if (rc)
5387                         break;
5388         }
5389
5390         /*
5391          * The new CB should have space at the end for two MSG_PROT packets:
5392          * 1. A packet that will act as a completion packet
5393          * 2. A packet that will generate MSI-X interrupt
5394          */
5395         if (parser->completion)
5396                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5397
5398         return rc;
5399 }
5400
5401 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5402                                 struct hl_cs_parser *parser,
5403                                 struct packet_lin_dma *user_dma_pkt,
5404                                 struct packet_lin_dma *new_dma_pkt,
5405                                 u32 *new_dma_pkt_size)
5406 {
5407         struct hl_userptr *userptr;
5408         struct scatterlist *sg, *sg_next_iter;
5409         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5410         u64 len, len_next;
5411         dma_addr_t dma_addr, dma_addr_next;
5412         u64 device_memory_addr, addr;
5413         enum dma_data_direction dir;
5414         struct sg_table *sgt;
5415         bool src_in_host = false;
5416         bool skip_host_mem_pin = false;
5417         bool user_memset;
5418
5419         ctl = le32_to_cpu(user_dma_pkt->ctl);
5420
5421         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5422                 src_in_host = true;
5423
5424         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5425                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5426
5427         if (src_in_host) {
5428                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5429                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5430                 dir = DMA_TO_DEVICE;
5431                 if (user_memset)
5432                         skip_host_mem_pin = true;
5433         } else {
5434                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5435                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5436                 dir = DMA_FROM_DEVICE;
5437         }
5438
5439         if ((!skip_host_mem_pin) &&
5440                 (!hl_userptr_is_pinned(hdev, addr,
5441                                         le32_to_cpu(user_dma_pkt->tsize),
5442                                         parser->job_userptr_list, &userptr))) {
5443                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5444                                 addr, user_dma_pkt->tsize);
5445                 return -EFAULT;
5446         }
5447
5448         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5449                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5450                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5451                 return 0;
5452         }
5453
5454         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5455
5456         sgt = userptr->sgt;
5457         dma_desc_cnt = 0;
5458
5459         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5460                 len = sg_dma_len(sg);
5461                 dma_addr = sg_dma_address(sg);
5462
5463                 if (len == 0)
5464                         break;
5465
5466                 while ((count + 1) < sgt->nents) {
5467                         sg_next_iter = sg_next(sg);
5468                         len_next = sg_dma_len(sg_next_iter);
5469                         dma_addr_next = sg_dma_address(sg_next_iter);
5470
5471                         if (len_next == 0)
5472                                 break;
5473
5474                         if ((dma_addr + len == dma_addr_next) &&
5475                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5476                                 len += len_next;
5477                                 count++;
5478                                 sg = sg_next_iter;
5479                         } else {
5480                                 break;
5481                         }
5482                 }
5483
5484                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5485                 if (likely(dma_desc_cnt))
5486                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5487                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5488                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5489                 new_dma_pkt->tsize = cpu_to_le32(len);
5490
5491                 if (dir == DMA_TO_DEVICE) {
5492                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5493                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5494                 } else {
5495                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5496                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5497                 }
5498
5499                 if (!user_memset)
5500                         device_memory_addr += len;
5501                 dma_desc_cnt++;
5502                 new_dma_pkt++;
5503         }
5504
5505         if (!dma_desc_cnt) {
5506                 dev_err(hdev->dev,
5507                         "Error of 0 SG entries when patching DMA packet\n");
5508                 return -EFAULT;
5509         }
5510
5511         /* Fix the last dma packet - wrcomp must be as user set it */
5512         new_dma_pkt--;
5513         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5514
5515         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5516
5517         return 0;
5518 }
5519
5520 static int gaudi_patch_cb(struct hl_device *hdev,
5521                                 struct hl_cs_parser *parser)
5522 {
5523         u32 cb_parsed_length = 0;
5524         u32 cb_patched_cur_length = 0;
5525         int rc = 0;
5526
5527         /* cb_user_size is more than 0 so loop will always be executed */
5528         while (cb_parsed_length < parser->user_cb_size) {
5529                 enum packet_id pkt_id;
5530                 u16 pkt_size;
5531                 u32 new_pkt_size = 0;
5532                 struct gaudi_packet *user_pkt, *kernel_pkt;
5533
5534                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5535                 kernel_pkt = parser->patched_cb->kernel_address +
5536                                         cb_patched_cur_length;
5537
5538                 pkt_id = (enum packet_id) (
5539                                 (le64_to_cpu(user_pkt->header) &
5540                                 PACKET_HEADER_PACKET_ID_MASK) >>
5541                                         PACKET_HEADER_PACKET_ID_SHIFT);
5542
5543                 if (!validate_packet_id(pkt_id)) {
5544                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5545                         rc = -EINVAL;
5546                         break;
5547                 }
5548
5549                 pkt_size = gaudi_packet_sizes[pkt_id];
5550                 cb_parsed_length += pkt_size;
5551                 if (cb_parsed_length > parser->user_cb_size) {
5552                         dev_err(hdev->dev,
5553                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5554                         rc = -EINVAL;
5555                         break;
5556                 }
5557
5558                 switch (pkt_id) {
5559                 case PACKET_LIN_DMA:
5560                         rc = gaudi_patch_dma_packet(hdev, parser,
5561                                         (struct packet_lin_dma *) user_pkt,
5562                                         (struct packet_lin_dma *) kernel_pkt,
5563                                         &new_pkt_size);
5564                         cb_patched_cur_length += new_pkt_size;
5565                         break;
5566
5567                 case PACKET_MSG_PROT:
5568                         dev_err(hdev->dev,
5569                                 "User not allowed to use MSG_PROT\n");
5570                         rc = -EPERM;
5571                         break;
5572
5573                 case PACKET_CP_DMA:
5574                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5575                         rc = -EPERM;
5576                         break;
5577
5578                 case PACKET_STOP:
5579                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5580                         rc = -EPERM;
5581                         break;
5582
5583                 case PACKET_WREG_32:
5584                 case PACKET_WREG_BULK:
5585                 case PACKET_MSG_LONG:
5586                 case PACKET_MSG_SHORT:
5587                 case PACKET_REPEAT:
5588                 case PACKET_FENCE:
5589                 case PACKET_NOP:
5590                 case PACKET_ARB_POINT:
5591                 case PACKET_LOAD_AND_EXE:
5592                         memcpy(kernel_pkt, user_pkt, pkt_size);
5593                         cb_patched_cur_length += pkt_size;
5594                         break;
5595
5596                 default:
5597                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5598                                 pkt_id);
5599                         rc = -EINVAL;
5600                         break;
5601                 }
5602
5603                 if (rc)
5604                         break;
5605         }
5606
5607         return rc;
5608 }
5609
5610 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5611                 struct hl_cs_parser *parser)
5612 {
5613         u64 patched_cb_handle;
5614         u32 patched_cb_size;
5615         struct hl_cb *user_cb;
5616         int rc;
5617
5618         /*
5619          * The new CB should have space at the end for two MSG_PROT pkt:
5620          * 1. A packet that will act as a completion packet
5621          * 2. A packet that will generate MSI interrupt
5622          */
5623         if (parser->completion)
5624                 parser->patched_cb_size = parser->user_cb_size +
5625                                 sizeof(struct packet_msg_prot) * 2;
5626         else
5627                 parser->patched_cb_size = parser->user_cb_size;
5628
5629         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5630                                 parser->patched_cb_size, false, false,
5631                                 &patched_cb_handle);
5632
5633         if (rc) {
5634                 dev_err(hdev->dev,
5635                         "Failed to allocate patched CB for DMA CS %d\n",
5636                         rc);
5637                 return rc;
5638         }
5639
5640         patched_cb_handle >>= PAGE_SHIFT;
5641         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5642                                 (u32) patched_cb_handle);
5643         /* hl_cb_get should never fail */
5644         if (!parser->patched_cb) {
5645                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5646                         (u32) patched_cb_handle);
5647                 rc = -EFAULT;
5648                 goto out;
5649         }
5650
5651         /*
5652          * The check that parser->user_cb_size <= parser->user_cb->size was done
5653          * in validate_queue_index().
5654          */
5655         memcpy(parser->patched_cb->kernel_address,
5656                 parser->user_cb->kernel_address,
5657                 parser->user_cb_size);
5658
5659         patched_cb_size = parser->patched_cb_size;
5660
5661         /* Validate patched CB instead of user CB */
5662         user_cb = parser->user_cb;
5663         parser->user_cb = parser->patched_cb;
5664         rc = gaudi_validate_cb(hdev, parser, true);
5665         parser->user_cb = user_cb;
5666
5667         if (rc) {
5668                 hl_cb_put(parser->patched_cb);
5669                 goto out;
5670         }
5671
5672         if (patched_cb_size != parser->patched_cb_size) {
5673                 dev_err(hdev->dev, "user CB size mismatch\n");
5674                 hl_cb_put(parser->patched_cb);
5675                 rc = -EINVAL;
5676                 goto out;
5677         }
5678
5679 out:
5680         /*
5681          * Always call cb destroy here because we still have 1 reference
5682          * to it by calling cb_get earlier. After the job will be completed,
5683          * cb_put will release it, but here we want to remove it from the
5684          * idr
5685          */
5686         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5687                                         patched_cb_handle << PAGE_SHIFT);
5688
5689         return rc;
5690 }
5691
5692 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5693                 struct hl_cs_parser *parser)
5694 {
5695         u64 patched_cb_handle;
5696         int rc;
5697
5698         rc = gaudi_validate_cb(hdev, parser, false);
5699
5700         if (rc)
5701                 goto free_userptr;
5702
5703         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5704                                 parser->patched_cb_size, false, false,
5705                                 &patched_cb_handle);
5706         if (rc) {
5707                 dev_err(hdev->dev,
5708                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5709                 goto free_userptr;
5710         }
5711
5712         patched_cb_handle >>= PAGE_SHIFT;
5713         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5714                                 (u32) patched_cb_handle);
5715         /* hl_cb_get should never fail here */
5716         if (!parser->patched_cb) {
5717                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5718                                 (u32) patched_cb_handle);
5719                 rc = -EFAULT;
5720                 goto out;
5721         }
5722
5723         rc = gaudi_patch_cb(hdev, parser);
5724
5725         if (rc)
5726                 hl_cb_put(parser->patched_cb);
5727
5728 out:
5729         /*
5730          * Always call cb destroy here because we still have 1 reference
5731          * to it by calling cb_get earlier. After the job will be completed,
5732          * cb_put will release it, but here we want to remove it from the
5733          * idr
5734          */
5735         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5736                                 patched_cb_handle << PAGE_SHIFT);
5737
5738 free_userptr:
5739         if (rc)
5740                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5741         return rc;
5742 }
5743
5744 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5745                                         struct hl_cs_parser *parser)
5746 {
5747         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5748         struct gaudi_device *gaudi = hdev->asic_specific;
5749         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5750                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5751
5752         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5753                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5754                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5755                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5756                                 parser->hw_queue_id);
5757                 return -EINVAL;
5758         }
5759
5760         /* For internal queue jobs just check if CB address is valid */
5761         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5762                                         parser->user_cb_size,
5763                                         asic_prop->sram_user_base_address,
5764                                         asic_prop->sram_end_address))
5765                 return 0;
5766
5767         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5768                                         parser->user_cb_size,
5769                                         asic_prop->dram_user_base_address,
5770                                         asic_prop->dram_end_address))
5771                 return 0;
5772
5773         /* PMMU and HPMMU addresses are equal, check only one of them */
5774         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5775                                         parser->user_cb_size,
5776                                         asic_prop->pmmu.start_addr,
5777                                         asic_prop->pmmu.end_addr))
5778                 return 0;
5779
5780         dev_err(hdev->dev,
5781                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5782                 parser->user_cb, parser->user_cb_size);
5783
5784         return -EFAULT;
5785 }
5786
5787 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5788 {
5789         struct gaudi_device *gaudi = hdev->asic_specific;
5790
5791         if (parser->queue_type == QUEUE_TYPE_INT)
5792                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5793
5794         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5795                 return gaudi_parse_cb_mmu(hdev, parser);
5796         else
5797                 return gaudi_parse_cb_no_mmu(hdev, parser);
5798 }
5799
5800 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5801                                         void *kernel_address, u32 len,
5802                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5803                                         bool eb)
5804 {
5805         struct gaudi_device *gaudi = hdev->asic_specific;
5806         struct packet_msg_prot *cq_pkt;
5807         u64 msi_addr;
5808         u32 tmp;
5809
5810         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5811
5812         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5813         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5814
5815         if (eb)
5816                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5817
5818         cq_pkt->ctl = cpu_to_le32(tmp);
5819         cq_pkt->value = cpu_to_le32(cq_val);
5820         cq_pkt->addr = cpu_to_le64(cq_addr);
5821
5822         cq_pkt++;
5823
5824         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5825         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5826         cq_pkt->ctl = cpu_to_le32(tmp);
5827         cq_pkt->value = cpu_to_le32(1);
5828
5829         if (gaudi->multi_msi_mode)
5830                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5831         else
5832                 msi_addr = mmPCIE_CORE_MSI_REQ;
5833
5834         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5835 }
5836
5837 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5838 {
5839         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5840 }
5841
5842 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5843                                         u32 size, u64 val)
5844 {
5845         struct packet_lin_dma *lin_dma_pkt;
5846         struct hl_cs_job *job;
5847         u32 cb_size, ctl, err_cause;
5848         struct hl_cb *cb;
5849         u64 id;
5850         int rc;
5851
5852         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5853         if (!cb)
5854                 return -EFAULT;
5855
5856         lin_dma_pkt = cb->kernel_address;
5857         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5858         cb_size = sizeof(*lin_dma_pkt);
5859
5860         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5861         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5862         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5863         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5864         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5865
5866         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5867         lin_dma_pkt->src_addr = cpu_to_le64(val);
5868         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5869         lin_dma_pkt->tsize = cpu_to_le32(size);
5870
5871         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5872         if (!job) {
5873                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5874                 rc = -ENOMEM;
5875                 goto release_cb;
5876         }
5877
5878         /* Verify DMA is OK */
5879         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5880         if (err_cause && !hdev->init_done) {
5881                 dev_dbg(hdev->dev,
5882                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5883                         err_cause);
5884                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5885         }
5886
5887         job->id = 0;
5888         job->user_cb = cb;
5889         atomic_inc(&job->user_cb->cs_cnt);
5890         job->user_cb_size = cb_size;
5891         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5892         job->patched_cb = job->user_cb;
5893         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5894
5895         hl_debugfs_add_job(hdev, job);
5896
5897         rc = gaudi_send_job_on_qman0(hdev, job);
5898         hl_debugfs_remove_job(hdev, job);
5899         kfree(job);
5900         atomic_dec(&cb->cs_cnt);
5901
5902         /* Verify DMA is OK */
5903         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5904         if (err_cause) {
5905                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5906                 rc = -EIO;
5907                 if (!hdev->init_done) {
5908                         dev_dbg(hdev->dev,
5909                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5910                                 err_cause);
5911                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5912                 }
5913         }
5914
5915 release_cb:
5916         id = cb->id;
5917         hl_cb_put(cb);
5918         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5919
5920         return rc;
5921 }
5922
5923 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5924                                         u32 num_regs, u32 val)
5925 {
5926         struct packet_msg_long *pkt;
5927         struct hl_cs_job *job;
5928         u32 cb_size, ctl;
5929         struct hl_cb *cb;
5930         int i, rc;
5931
5932         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5933
5934         if (cb_size > SZ_2M) {
5935                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5936                 return -ENOMEM;
5937         }
5938
5939         cb = hl_cb_kernel_create(hdev, cb_size, false);
5940         if (!cb)
5941                 return -EFAULT;
5942
5943         pkt = cb->kernel_address;
5944
5945         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5946         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5947         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5948         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5949         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5950
5951         for (i = 0; i < num_regs ; i++, pkt++) {
5952                 pkt->ctl = cpu_to_le32(ctl);
5953                 pkt->value = cpu_to_le32(val);
5954                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5955         }
5956
5957         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5958         if (!job) {
5959                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5960                 rc = -ENOMEM;
5961                 goto release_cb;
5962         }
5963
5964         job->id = 0;
5965         job->user_cb = cb;
5966         atomic_inc(&job->user_cb->cs_cnt);
5967         job->user_cb_size = cb_size;
5968         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5969         job->patched_cb = job->user_cb;
5970         job->job_cb_size = cb_size;
5971
5972         hl_debugfs_add_job(hdev, job);
5973
5974         rc = gaudi_send_job_on_qman0(hdev, job);
5975         hl_debugfs_remove_job(hdev, job);
5976         kfree(job);
5977         atomic_dec(&cb->cs_cnt);
5978
5979 release_cb:
5980         hl_cb_put(cb);
5981         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5982
5983         return rc;
5984 }
5985
5986 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5987 {
5988         u64 base_addr;
5989         u32 num_regs;
5990         int rc;
5991
5992         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5993         num_regs = NUM_OF_SOB_IN_BLOCK;
5994         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5995         if (rc) {
5996                 dev_err(hdev->dev, "failed resetting SM registers");
5997                 return -ENOMEM;
5998         }
5999
6000         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
6001         num_regs = NUM_OF_SOB_IN_BLOCK;
6002         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6003         if (rc) {
6004                 dev_err(hdev->dev, "failed resetting SM registers");
6005                 return -ENOMEM;
6006         }
6007
6008         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6009         num_regs = NUM_OF_SOB_IN_BLOCK;
6010         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6011         if (rc) {
6012                 dev_err(hdev->dev, "failed resetting SM registers");
6013                 return -ENOMEM;
6014         }
6015
6016         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6017         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6018         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6019         if (rc) {
6020                 dev_err(hdev->dev, "failed resetting SM registers");
6021                 return -ENOMEM;
6022         }
6023
6024         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6025         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6026         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6027         if (rc) {
6028                 dev_err(hdev->dev, "failed resetting SM registers");
6029                 return -ENOMEM;
6030         }
6031
6032         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6033         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6034         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6035         if (rc) {
6036                 dev_err(hdev->dev, "failed resetting SM registers");
6037                 return -ENOMEM;
6038         }
6039
6040         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6041                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6042         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6043         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6044         if (rc) {
6045                 dev_err(hdev->dev, "failed resetting SM registers");
6046                 return -ENOMEM;
6047         }
6048
6049         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6050                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6051         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6052         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6053         if (rc) {
6054                 dev_err(hdev->dev, "failed resetting SM registers");
6055                 return -ENOMEM;
6056         }
6057
6058         return 0;
6059 }
6060
6061 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6062 {
6063         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6064                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6065         int i;
6066
6067         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6068                 u64 sob_addr = CFG_BASE +
6069                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6070                                 (i * sob_delta);
6071                 u32 dma_offset = i * DMA_CORE_OFFSET;
6072
6073                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6074                                 lower_32_bits(sob_addr));
6075                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6076                                 upper_32_bits(sob_addr));
6077                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6078
6079                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6080                  * modified by the user for SRAM reduction
6081                  */
6082                 if (i > 1)
6083                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6084                                                                 0x00000001);
6085         }
6086 }
6087
6088 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6089 {
6090         u32 qman_offset;
6091         int i;
6092
6093         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6094                 qman_offset = i * DMA_QMAN_OFFSET;
6095                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6096         }
6097
6098         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6099                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6100                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6101         }
6102
6103         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6104                 qman_offset = i * TPC_QMAN_OFFSET;
6105                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6106         }
6107
6108         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6109                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6110                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6111                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6112         }
6113 }
6114
6115 static int gaudi_restore_user_registers(struct hl_device *hdev)
6116 {
6117         int rc;
6118
6119         rc = gaudi_restore_sm_registers(hdev);
6120         if (rc)
6121                 return rc;
6122
6123         gaudi_restore_dma_registers(hdev);
6124         gaudi_restore_qm_registers(hdev);
6125
6126         return 0;
6127 }
6128
6129 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6130 {
6131         return 0;
6132 }
6133
6134 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6135 {
6136         struct asic_fixed_properties *prop = &hdev->asic_prop;
6137         struct gaudi_device *gaudi = hdev->asic_specific;
6138         u64 addr = prop->mmu_pgt_addr;
6139         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6140
6141         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6142                 return 0;
6143
6144         return gaudi_memset_device_memory(hdev, addr, size, 0);
6145 }
6146
6147 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6148 {
6149
6150 }
6151
6152 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6153                         bool user_address, u32 *val)
6154 {
6155         struct asic_fixed_properties *prop = &hdev->asic_prop;
6156         struct gaudi_device *gaudi = hdev->asic_specific;
6157         u64 hbm_bar_addr, host_phys_end;
6158         int rc = 0;
6159
6160         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6161
6162         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6163
6164                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6165                                 (hdev->clock_gating_mask &
6166                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6167
6168                         dev_err_ratelimited(hdev->dev,
6169                                 "Can't read register - clock gating is enabled!\n");
6170                         rc = -EFAULT;
6171                 } else {
6172                         *val = RREG32(addr - CFG_BASE);
6173                 }
6174
6175         } else if ((addr >= SRAM_BASE_ADDR) &&
6176                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6177                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6178                                 (addr - SRAM_BASE_ADDR));
6179         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6180                 u64 bar_base_addr = DRAM_PHYS_BASE +
6181                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6182
6183                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6184                 if (hbm_bar_addr != U64_MAX) {
6185                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6186                                                 (addr - bar_base_addr));
6187
6188                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6189                                                 hbm_bar_addr);
6190                 }
6191                 if (hbm_bar_addr == U64_MAX)
6192                         rc = -EIO;
6193         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6194                         user_address && !iommu_present(&pci_bus_type)) {
6195                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6196         } else {
6197                 rc = -EFAULT;
6198         }
6199
6200         return rc;
6201 }
6202
6203 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6204                         bool user_address, u32 val)
6205 {
6206         struct asic_fixed_properties *prop = &hdev->asic_prop;
6207         struct gaudi_device *gaudi = hdev->asic_specific;
6208         u64 hbm_bar_addr, host_phys_end;
6209         int rc = 0;
6210
6211         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6212
6213         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6214
6215                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6216                                 (hdev->clock_gating_mask &
6217                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6218
6219                         dev_err_ratelimited(hdev->dev,
6220                                 "Can't write register - clock gating is enabled!\n");
6221                         rc = -EFAULT;
6222                 } else {
6223                         WREG32(addr - CFG_BASE, val);
6224                 }
6225
6226         } else if ((addr >= SRAM_BASE_ADDR) &&
6227                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6228                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6229                                         (addr - SRAM_BASE_ADDR));
6230         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6231                 u64 bar_base_addr = DRAM_PHYS_BASE +
6232                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6233
6234                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6235                 if (hbm_bar_addr != U64_MAX) {
6236                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6237                                                 (addr - bar_base_addr));
6238
6239                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6240                                                 hbm_bar_addr);
6241                 }
6242                 if (hbm_bar_addr == U64_MAX)
6243                         rc = -EIO;
6244         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6245                         user_address && !iommu_present(&pci_bus_type)) {
6246                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6247         } else {
6248                 rc = -EFAULT;
6249         }
6250
6251         return rc;
6252 }
6253
6254 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6255                                 bool user_address, u64 *val)
6256 {
6257         struct asic_fixed_properties *prop = &hdev->asic_prop;
6258         struct gaudi_device *gaudi = hdev->asic_specific;
6259         u64 hbm_bar_addr, host_phys_end;
6260         int rc = 0;
6261
6262         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6263
6264         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6265
6266                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6267                                 (hdev->clock_gating_mask &
6268                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6269
6270                         dev_err_ratelimited(hdev->dev,
6271                                 "Can't read register - clock gating is enabled!\n");
6272                         rc = -EFAULT;
6273                 } else {
6274                         u32 val_l = RREG32(addr - CFG_BASE);
6275                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6276
6277                         *val = (((u64) val_h) << 32) | val_l;
6278                 }
6279
6280         } else if ((addr >= SRAM_BASE_ADDR) &&
6281                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6282                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6283                                 (addr - SRAM_BASE_ADDR));
6284         } else if (addr <=
6285                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6286                 u64 bar_base_addr = DRAM_PHYS_BASE +
6287                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6288
6289                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6290                 if (hbm_bar_addr != U64_MAX) {
6291                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6292                                                 (addr - bar_base_addr));
6293
6294                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6295                                                 hbm_bar_addr);
6296                 }
6297                 if (hbm_bar_addr == U64_MAX)
6298                         rc = -EIO;
6299         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6300                         user_address && !iommu_present(&pci_bus_type)) {
6301                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6302         } else {
6303                 rc = -EFAULT;
6304         }
6305
6306         return rc;
6307 }
6308
6309 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6310                                 bool user_address, u64 val)
6311 {
6312         struct asic_fixed_properties *prop = &hdev->asic_prop;
6313         struct gaudi_device *gaudi = hdev->asic_specific;
6314         u64 hbm_bar_addr, host_phys_end;
6315         int rc = 0;
6316
6317         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6318
6319         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6320
6321                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6322                                 (hdev->clock_gating_mask &
6323                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6324
6325                         dev_err_ratelimited(hdev->dev,
6326                                 "Can't write register - clock gating is enabled!\n");
6327                         rc = -EFAULT;
6328                 } else {
6329                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6330                         WREG32(addr + sizeof(u32) - CFG_BASE,
6331                                 upper_32_bits(val));
6332                 }
6333
6334         } else if ((addr >= SRAM_BASE_ADDR) &&
6335                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6336                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6337                                         (addr - SRAM_BASE_ADDR));
6338         } else if (addr <=
6339                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6340                 u64 bar_base_addr = DRAM_PHYS_BASE +
6341                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6342
6343                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6344                 if (hbm_bar_addr != U64_MAX) {
6345                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6346                                                 (addr - bar_base_addr));
6347
6348                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6349                                                 hbm_bar_addr);
6350                 }
6351                 if (hbm_bar_addr == U64_MAX)
6352                         rc = -EIO;
6353         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6354                         user_address && !iommu_present(&pci_bus_type)) {
6355                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6356         } else {
6357                 rc = -EFAULT;
6358         }
6359
6360         return rc;
6361 }
6362
6363 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6364                                         u32 size_to_dma, dma_addr_t dma_addr)
6365 {
6366         u32 err_cause, val;
6367         u64 dma_offset;
6368         int rc;
6369
6370         dma_offset = dma_id * DMA_CORE_OFFSET;
6371
6372         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6373         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6374         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6375         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6376         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6377         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6378                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6379
6380         rc = hl_poll_timeout(
6381                 hdev,
6382                 mmDMA0_CORE_STS0 + dma_offset,
6383                 val,
6384                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6385                 0,
6386                 1000000);
6387
6388         if (rc) {
6389                 dev_err(hdev->dev,
6390                         "DMA %d timed-out during reading of 0x%llx\n",
6391                         dma_id, addr);
6392                 return -EIO;
6393         }
6394
6395         /* Verify DMA is OK */
6396         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6397         if (err_cause) {
6398                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6399                 dev_dbg(hdev->dev,
6400                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6401                         err_cause);
6402                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6403
6404                 return -EIO;
6405         }
6406
6407         return 0;
6408 }
6409
6410 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6411                                 void *blob_addr)
6412 {
6413         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6414         struct gaudi_device *gaudi = hdev->asic_specific;
6415         u64 dma_offset, qm_offset;
6416         dma_addr_t dma_addr;
6417         void *kernel_addr;
6418         bool is_eng_idle;
6419         int rc = 0, dma_id;
6420
6421         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6422                                                 hdev, SZ_2M,
6423                                                 &dma_addr,
6424                                                 GFP_KERNEL | __GFP_ZERO);
6425
6426         if (!kernel_addr)
6427                 return -ENOMEM;
6428
6429         mutex_lock(&gaudi->clk_gate_mutex);
6430
6431         hdev->asic_funcs->disable_clock_gating(hdev);
6432
6433         hdev->asic_funcs->hw_queues_lock(hdev);
6434
6435         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6436         dma_offset = dma_id * DMA_CORE_OFFSET;
6437         qm_offset = dma_id * DMA_QMAN_OFFSET;
6438         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6439         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6440
6441         if (!is_eng_idle) {
6442                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6443                 dma_offset = dma_id * DMA_CORE_OFFSET;
6444                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6445                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6446                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6447
6448                 if (!is_eng_idle) {
6449                         dev_err_ratelimited(hdev->dev,
6450                                 "Can't read via DMA because it is BUSY\n");
6451                         rc = -EAGAIN;
6452                         goto out;
6453                 }
6454         }
6455
6456         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6457         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6458                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6459
6460         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6461          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6462          * ASID
6463          */
6464         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6465
6466         /* Verify DMA is OK */
6467         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6468         if (err_cause) {
6469                 dev_dbg(hdev->dev,
6470                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6471                         err_cause);
6472                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6473         }
6474
6475         pos = 0;
6476         size_left = size;
6477         size_to_dma = SZ_2M;
6478
6479         while (size_left > 0) {
6480
6481                 if (size_left < SZ_2M)
6482                         size_to_dma = size_left;
6483
6484                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6485                                                 dma_addr);
6486                 if (rc)
6487                         break;
6488
6489                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6490
6491                 if (size_left <= SZ_2M)
6492                         break;
6493
6494                 pos += SZ_2M;
6495                 addr += SZ_2M;
6496                 size_left -= SZ_2M;
6497         }
6498
6499         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6500          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6501          * ASID
6502          */
6503         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6504                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6505
6506         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6507
6508 out:
6509         hdev->asic_funcs->hw_queues_unlock(hdev);
6510
6511         hdev->asic_funcs->set_clock_gating(hdev);
6512
6513         mutex_unlock(&gaudi->clk_gate_mutex);
6514
6515         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6516                                                 dma_addr);
6517
6518         return rc;
6519 }
6520
6521 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6522 {
6523         struct gaudi_device *gaudi = hdev->asic_specific;
6524
6525         if (hdev->hard_reset_pending)
6526                 return U64_MAX;
6527
6528         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6529                         (addr - gaudi->hbm_bar_cur_addr));
6530 }
6531
6532 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6533 {
6534         struct gaudi_device *gaudi = hdev->asic_specific;
6535
6536         if (hdev->hard_reset_pending)
6537                 return;
6538
6539         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6540                         (addr - gaudi->hbm_bar_cur_addr));
6541 }
6542
6543 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6544 {
6545         /* mask to zero the MMBP and ASID bits */
6546         WREG32_AND(reg, ~0x7FF);
6547         WREG32_OR(reg, asid);
6548 }
6549
6550 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6551 {
6552         struct gaudi_device *gaudi = hdev->asic_specific;
6553
6554         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6555                 return;
6556
6557         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6558                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6559                 return;
6560         }
6561
6562         mutex_lock(&gaudi->clk_gate_mutex);
6563
6564         hdev->asic_funcs->disable_clock_gating(hdev);
6565
6566         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6571
6572         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577
6578         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6583
6584         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6589
6590         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6595
6596         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6601
6602         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6607
6608         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6613
6614         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6622
6623         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6626         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6628         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6630
6631         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6637         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6638
6639         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6644         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6646
6647         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6649         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6650         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6652         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6654
6655         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6656         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6657         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6658         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6660         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6662
6663         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6665         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6669         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6670
6671         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6672         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6673         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6674         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6676         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6677         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6678
6679         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6680         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6681         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6682         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6683         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6684         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6685         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6686
6687         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6688         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6689         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6690         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6691         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6692         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6693         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6694         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6695         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6696         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6697
6698         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6699         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6700         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6701         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6702         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6703         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6704         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6705         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6706         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6707         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6708         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6709         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6710
6711         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6713                                 asid);
6714                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6715                                 asid);
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6719                                 asid);
6720                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6721                                 asid);
6722         }
6723
6724         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6726                                 asid);
6727                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6728                                 asid);
6729                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6730                                 asid);
6731                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6732                                 asid);
6733                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6734                                 asid);
6735         }
6736
6737         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6739                                 asid);
6740                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6741                                 asid);
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6745                                 asid);
6746                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6747                                 asid);
6748         }
6749
6750         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6752                                 asid);
6753                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6754                                 asid);
6755                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6756                                 asid);
6757                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6758                                 asid);
6759                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6760                                 asid);
6761         }
6762
6763         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6765                                 asid);
6766                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6767                                 asid);
6768                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6769                                 asid);
6770                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6771                                 asid);
6772                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6773                                 asid);
6774         }
6775
6776         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6778                                 asid);
6779                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6780                                 asid);
6781                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6782                                 asid);
6783                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6784                                 asid);
6785                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6786                                 asid);
6787         }
6788
6789         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6790                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6791                                 asid);
6792                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6793                                 asid);
6794                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6795                                 asid);
6796                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6797                                 asid);
6798                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6799                                 asid);
6800         }
6801
6802         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6803                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6804                                 asid);
6805                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6806                                 asid);
6807                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6808                                 asid);
6809                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6810                                 asid);
6811                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6812                                 asid);
6813         }
6814
6815         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6816                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6817                                 asid);
6818                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6819                                 asid);
6820                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6821                                 asid);
6822                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6823                                 asid);
6824                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6825                                 asid);
6826         }
6827
6828         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6829                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6830                                 asid);
6831                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6832                                 asid);
6833                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6834                                 asid);
6835                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6836                                 asid);
6837                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6838                                 asid);
6839         }
6840
6841         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6842         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6843
6844         hdev->asic_funcs->set_clock_gating(hdev);
6845
6846         mutex_unlock(&gaudi->clk_gate_mutex);
6847 }
6848
6849 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6850                 struct hl_cs_job *job)
6851 {
6852         struct packet_msg_prot *fence_pkt;
6853         u32 *fence_ptr;
6854         dma_addr_t fence_dma_addr;
6855         struct hl_cb *cb;
6856         u32 tmp, timeout, dma_offset;
6857         int rc;
6858
6859         if (hdev->pldm)
6860                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6861         else
6862                 timeout = HL_DEVICE_TIMEOUT_USEC;
6863
6864         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6865                 dev_err_ratelimited(hdev->dev,
6866                         "Can't send driver job on QMAN0 because the device is not idle\n");
6867                 return -EBUSY;
6868         }
6869
6870         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6871                                                         &fence_dma_addr);
6872         if (!fence_ptr) {
6873                 dev_err(hdev->dev,
6874                         "Failed to allocate fence memory for QMAN0\n");
6875                 return -ENOMEM;
6876         }
6877
6878         cb = job->patched_cb;
6879
6880         fence_pkt = cb->kernel_address +
6881                         job->job_cb_size - sizeof(struct packet_msg_prot);
6882
6883         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6884         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6885         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6886
6887         fence_pkt->ctl = cpu_to_le32(tmp);
6888         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6889         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6890
6891         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6892
6893         WREG32(mmDMA0_CORE_PROT + dma_offset,
6894                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6895
6896         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6897                                         job->job_cb_size, cb->bus_address);
6898         if (rc) {
6899                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6900                 goto free_fence_ptr;
6901         }
6902
6903         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6904                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6905                                 timeout, true);
6906
6907         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6908
6909         if (rc == -ETIMEDOUT) {
6910                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6911                 goto free_fence_ptr;
6912         }
6913
6914 free_fence_ptr:
6915         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6916
6917         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6918                                         fence_dma_addr);
6919         return rc;
6920 }
6921
6922 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6923 {
6924         if (event_type >= GAUDI_EVENT_SIZE)
6925                 goto event_not_supported;
6926
6927         if (!gaudi_irq_map_table[event_type].valid)
6928                 goto event_not_supported;
6929
6930         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6931
6932         return;
6933
6934 event_not_supported:
6935         snprintf(desc, size, "N/A");
6936 }
6937
6938 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6939                                                         u32 x_y, bool is_write)
6940 {
6941         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6942
6943         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6944                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6945
6946         switch (x_y) {
6947         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6948         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6949                 dma_id[0] = 0;
6950                 dma_id[1] = 2;
6951                 break;
6952         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6953         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6954                 dma_id[0] = 1;
6955                 dma_id[1] = 3;
6956                 break;
6957         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6958         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6959                 dma_id[0] = 4;
6960                 dma_id[1] = 6;
6961                 break;
6962         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6963         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6964                 dma_id[0] = 5;
6965                 dma_id[1] = 7;
6966                 break;
6967         default:
6968                 goto unknown_initiator;
6969         }
6970
6971         for (i = 0 ; i < 2 ; i++) {
6972                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6973                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6974         }
6975
6976         switch (x_y) {
6977         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6978         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6979                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6980                         return "DMA0";
6981                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6982                         return "DMA2";
6983                 else
6984                         return "DMA0 or DMA2";
6985         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6986         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6987                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6988                         return "DMA1";
6989                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6990                         return "DMA3";
6991                 else
6992                         return "DMA1 or DMA3";
6993         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6994         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6995                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6996                         return "DMA4";
6997                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6998                         return "DMA6";
6999                 else
7000                         return "DMA4 or DMA6";
7001         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7002         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7003                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
7004                         return "DMA5";
7005                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7006                         return "DMA7";
7007                 else
7008                         return "DMA5 or DMA7";
7009         }
7010
7011 unknown_initiator:
7012         return "unknown initiator";
7013 }
7014
7015 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7016                                                         bool is_write)
7017 {
7018         u32 val, x_y, axi_id;
7019
7020         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7021                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
7022         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7023                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7024         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7025                         RAZWI_INITIATOR_AXI_ID_SHIFT);
7026
7027         switch (x_y) {
7028         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7029                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7030                         return "TPC0";
7031                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7032                         return "NIC0";
7033                 break;
7034         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7035                 return "TPC1";
7036         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7037         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7038                 return "MME0";
7039         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7040         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7041                 return "MME1";
7042         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7043                 return "TPC2";
7044         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7045                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7046                         return "TPC3";
7047                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7048                         return "PCI";
7049                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7050                         return "CPU";
7051                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7052                         return "PSOC";
7053                 break;
7054         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7055         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7056         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7057         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7058         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7059         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7060         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7061         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7062                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7063         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7064                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7065                         return "TPC4";
7066                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7067                         return "NIC1";
7068                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7069                         return "NIC2";
7070                 break;
7071         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7072                 return "TPC5";
7073         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7074         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7075                 return "MME2";
7076         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7077         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7078                 return "MME3";
7079         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7080                 return "TPC6";
7081         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7082                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7083                         return "TPC7";
7084                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7085                         return "NIC4";
7086                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7087                         return "NIC5";
7088                 break;
7089         default:
7090                 break;
7091         }
7092
7093         dev_err(hdev->dev,
7094                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7095                 val,
7096                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7097                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7098                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7099                         RAZWI_INITIATOR_AXI_ID_MASK);
7100
7101         return "unknown initiator";
7102 }
7103
7104 static void gaudi_print_razwi_info(struct hl_device *hdev)
7105 {
7106         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7107                 dev_err_ratelimited(hdev->dev,
7108                         "RAZWI event caused by illegal write of %s\n",
7109                         gaudi_get_razwi_initiator_name(hdev, true));
7110                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7111         }
7112
7113         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7114                 dev_err_ratelimited(hdev->dev,
7115                         "RAZWI event caused by illegal read of %s\n",
7116                         gaudi_get_razwi_initiator_name(hdev, false));
7117                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7118         }
7119 }
7120
7121 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7122 {
7123         struct gaudi_device *gaudi = hdev->asic_specific;
7124         u64 addr;
7125         u32 val;
7126
7127         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7128                 return;
7129
7130         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7131         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7132                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7133                 addr <<= 32;
7134                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7135
7136                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7137                                         addr);
7138
7139                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7140         }
7141
7142         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7143         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7144                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7145                 addr <<= 32;
7146                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7147
7148                 dev_err_ratelimited(hdev->dev,
7149                                 "MMU access error on va 0x%llx\n", addr);
7150
7151                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7152         }
7153 }
7154
7155 /*
7156  *  +-------------------+------------------------------------------------------+
7157  *  | Configuration Reg |                     Description                      |
7158  *  |      Address      |                                                      |
7159  *  +-------------------+------------------------------------------------------+
7160  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7161  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7162  *  |                   |0xF34 memory wrappers 63:32                           |
7163  *  |                   |0xF38 memory wrappers 95:64                           |
7164  *  |                   |0xF3C memory wrappers 127:96                          |
7165  *  +-------------------+------------------------------------------------------+
7166  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7167  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7168  *  |                   |0xF44 memory wrappers 63:32                           |
7169  *  |                   |0xF48 memory wrappers 95:64                           |
7170  *  |                   |0xF4C memory wrappers 127:96                          |
7171  *  +-------------------+------------------------------------------------------+
7172  */
7173 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7174                 struct ecc_info_extract_params *params, u64 *ecc_address,
7175                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7176 {
7177         struct gaudi_device *gaudi = hdev->asic_specific;
7178         u32 i, num_mem_regs, reg, err_bit;
7179         u64 err_addr, err_word = 0;
7180         int rc = 0;
7181
7182         num_mem_regs = params->num_memories / 32 +
7183                         ((params->num_memories % 32) ? 1 : 0);
7184
7185         if (params->block_address >= CFG_BASE)
7186                 params->block_address -= CFG_BASE;
7187
7188         if (params->derr)
7189                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7190         else
7191                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7192
7193         if (params->disable_clock_gating) {
7194                 mutex_lock(&gaudi->clk_gate_mutex);
7195                 hdev->asic_funcs->disable_clock_gating(hdev);
7196         }
7197
7198         /* Set invalid wrapper index */
7199         *memory_wrapper_idx = 0xFF;
7200
7201         /* Iterate through memory wrappers, a single bit must be set */
7202         for (i = 0 ; i < num_mem_regs ; i++) {
7203                 err_addr += i * 4;
7204                 err_word = RREG32(err_addr);
7205                 if (err_word) {
7206                         err_bit = __ffs(err_word);
7207                         *memory_wrapper_idx = err_bit + (32 * i);
7208                         break;
7209                 }
7210         }
7211
7212         if (*memory_wrapper_idx == 0xFF) {
7213                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7214                 rc = -EINVAL;
7215                 goto enable_clk_gate;
7216         }
7217
7218         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7219                         *memory_wrapper_idx);
7220
7221         *ecc_address =
7222                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7223         *ecc_syndrom =
7224                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7225
7226         /* Clear error indication */
7227         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7228         if (params->derr)
7229                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7230         else
7231                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7232
7233         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7234
7235 enable_clk_gate:
7236         if (params->disable_clock_gating) {
7237                 hdev->asic_funcs->set_clock_gating(hdev);
7238
7239                 mutex_unlock(&gaudi->clk_gate_mutex);
7240         }
7241
7242         return rc;
7243 }
7244
7245 /*
7246  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7247  *
7248  * @idx: the current pi/ci value
7249  * @q_len: the queue length (power of 2)
7250  *
7251  * @return the cyclically decremented index
7252  */
7253 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7254 {
7255         u32 mask = q_len - 1;
7256
7257         /*
7258          * modular decrement is equivalent to adding (queue_size -1)
7259          * later we take LSBs to make sure the value is in the
7260          * range [0, queue_len - 1]
7261          */
7262         return (idx + q_len - 1) & mask;
7263 }
7264
7265 /**
7266  * gaudi_print_sw_config_stream_data - print SW config stream data
7267  *
7268  * @hdev: pointer to the habanalabs device structure
7269  * @stream: the QMAN's stream
7270  * @qman_base: base address of QMAN registers block
7271  */
7272 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7273                                                 u64 qman_base)
7274 {
7275         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7276         u32 cq_ptr_lo_off, size;
7277
7278         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7279
7280         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7281                                                 stream * cq_ptr_lo_off;
7282         cq_ptr_hi = cq_ptr_lo +
7283                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7284         cq_tsize = cq_ptr_lo +
7285                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7286
7287         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7288         size = RREG32(cq_tsize);
7289         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7290                                                         stream, cq_ptr, size);
7291 }
7292
7293 /**
7294  * gaudi_print_last_pqes_on_err - print last PQEs on error
7295  *
7296  * @hdev: pointer to the habanalabs device structure
7297  * @qid_base: first QID of the QMAN (out of 4 streams)
7298  * @stream: the QMAN's stream
7299  * @qman_base: base address of QMAN registers block
7300  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7301  */
7302 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7303                                                 u32 stream, u64 qman_base,
7304                                                 bool pr_sw_conf)
7305 {
7306         u32 ci, qm_ci_stream_off, queue_len;
7307         struct hl_hw_queue *q;
7308         u64 pq_ci;
7309         int i;
7310
7311         q = &hdev->kernel_queues[qid_base + stream];
7312
7313         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7314         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7315                                                 stream * qm_ci_stream_off;
7316
7317         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7318                                         q->int_queue_len : HL_QUEUE_LENGTH;
7319
7320         hdev->asic_funcs->hw_queues_lock(hdev);
7321
7322         if (pr_sw_conf)
7323                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7324
7325         ci = RREG32(pq_ci);
7326
7327         /* we should start printing form ci -1 */
7328         ci = gaudi_queue_idx_dec(ci, queue_len);
7329
7330         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7331                 struct hl_bd *bd;
7332                 u64 addr;
7333                 u32 len;
7334
7335                 bd = q->kernel_address;
7336                 bd += ci;
7337
7338                 len = le32_to_cpu(bd->len);
7339                 /* len 0 means uninitialized entry- break */
7340                 if (!len)
7341                         break;
7342
7343                 addr = le64_to_cpu(bd->ptr);
7344
7345                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7346                                                         stream, ci, addr, len);
7347
7348                 /* get previous ci, wrap if needed */
7349                 ci = gaudi_queue_idx_dec(ci, queue_len);
7350         }
7351
7352         hdev->asic_funcs->hw_queues_unlock(hdev);
7353 }
7354
7355 /**
7356  * print_qman_data_on_err - extract QMAN data on error
7357  *
7358  * @hdev: pointer to the habanalabs device structure
7359  * @qid_base: first QID of the QMAN (out of 4 streams)
7360  * @stream: the QMAN's stream
7361  * @qman_base: base address of QMAN registers block
7362  *
7363  * This function attempt to exatract as much data as possible on QMAN error.
7364  * On upper CP print the SW config stream data and last 8 PQEs.
7365  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7366  */
7367 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7368                                                 u32 stream, u64 qman_base)
7369 {
7370         u32 i;
7371
7372         if (stream != QMAN_STREAMS) {
7373                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7374                                                                         true);
7375                 return;
7376         }
7377
7378         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7379
7380         for (i = 0; i < QMAN_STREAMS; i++)
7381                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7382                                                                         false);
7383 }
7384
7385 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7386                                           const char *qm_name,
7387                                           u64 qman_base,
7388                                           u32 qid_base)
7389 {
7390         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7391         u64 glbl_sts_addr, arb_err_addr;
7392         char reg_desc[32];
7393
7394         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7395         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7396
7397         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7398         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7399                 glbl_sts_clr_val = 0;
7400                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7401
7402                 if (!glbl_sts_val)
7403                         continue;
7404
7405                 if (i == QMAN_STREAMS)
7406                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7407                 else
7408                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7409
7410                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7411                         if (glbl_sts_val & BIT(j)) {
7412                                 dev_err_ratelimited(hdev->dev,
7413                                                 "%s %s. err cause: %s\n",
7414                                                 qm_name, reg_desc,
7415                                                 gaudi_qman_error_cause[j]);
7416                                 glbl_sts_clr_val |= BIT(j);
7417                         }
7418                 }
7419
7420                 /* Write 1 clear errors */
7421                 if (!hdev->stop_on_err)
7422                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7423                 else
7424                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7425         }
7426
7427         arb_err_val = RREG32(arb_err_addr);
7428
7429         if (!arb_err_val)
7430                 return;
7431
7432         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7433                 if (arb_err_val & BIT(j)) {
7434                         dev_err_ratelimited(hdev->dev,
7435                                         "%s ARB_ERR. err cause: %s\n",
7436                                         qm_name,
7437                                         gaudi_qman_arb_error_cause[j]);
7438                 }
7439         }
7440 }
7441
7442 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7443                 struct hl_eq_sm_sei_data *sei_data)
7444 {
7445         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7446
7447         /* Flip the bits as the enum is ordered in the opposite way */
7448         index = (index ^ 0x3) & 0x3;
7449
7450         switch (sei_data->sei_cause) {
7451         case SM_SEI_SO_OVERFLOW:
7452                 dev_err_ratelimited(hdev->dev,
7453                         "%s SEI Error: SOB Group %u overflow/underflow",
7454                         gaudi_sync_manager_names[index],
7455                         le32_to_cpu(sei_data->sei_log));
7456                 break;
7457         case SM_SEI_LBW_4B_UNALIGNED:
7458                 dev_err_ratelimited(hdev->dev,
7459                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7460                         gaudi_sync_manager_names[index],
7461                         le32_to_cpu(sei_data->sei_log));
7462                 break;
7463         case SM_SEI_AXI_RESPONSE_ERR:
7464                 dev_err_ratelimited(hdev->dev,
7465                         "%s SEI Error: AXI ID %u response error",
7466                         gaudi_sync_manager_names[index],
7467                         le32_to_cpu(sei_data->sei_log));
7468                 break;
7469         default:
7470                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7471                                 le32_to_cpu(sei_data->sei_log));
7472                 break;
7473         }
7474 }
7475
7476 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7477                 struct hl_eq_ecc_data *ecc_data)
7478 {
7479         struct ecc_info_extract_params params;
7480         u64 ecc_address = 0, ecc_syndrom = 0;
7481         u8 index, memory_wrapper_idx = 0;
7482         bool extract_info_from_fw;
7483         int rc;
7484
7485         if (hdev->asic_prop.fw_security_enabled) {
7486                 extract_info_from_fw = true;
7487                 goto extract_ecc_info;
7488         }
7489
7490         switch (event_type) {
7491         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7492         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7493                 extract_info_from_fw = true;
7494                 break;
7495         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7496                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7497                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7498                 params.num_memories = 90;
7499                 params.derr = false;
7500                 params.disable_clock_gating = true;
7501                 extract_info_from_fw = false;
7502                 break;
7503         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7504                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7505                 params.block_address =
7506                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7507                 params.num_memories = 90;
7508                 params.derr = true;
7509                 params.disable_clock_gating = true;
7510                 extract_info_from_fw = false;
7511                 break;
7512         case GAUDI_EVENT_MME0_ACC_SERR:
7513         case GAUDI_EVENT_MME1_ACC_SERR:
7514         case GAUDI_EVENT_MME2_ACC_SERR:
7515         case GAUDI_EVENT_MME3_ACC_SERR:
7516                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7517                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7518                 params.num_memories = 128;
7519                 params.derr = false;
7520                 params.disable_clock_gating = true;
7521                 extract_info_from_fw = false;
7522                 break;
7523         case GAUDI_EVENT_MME0_ACC_DERR:
7524         case GAUDI_EVENT_MME1_ACC_DERR:
7525         case GAUDI_EVENT_MME2_ACC_DERR:
7526         case GAUDI_EVENT_MME3_ACC_DERR:
7527                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7528                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7529                 params.num_memories = 128;
7530                 params.derr = true;
7531                 params.disable_clock_gating = true;
7532                 extract_info_from_fw = false;
7533                 break;
7534         case GAUDI_EVENT_MME0_SBAB_SERR:
7535         case GAUDI_EVENT_MME1_SBAB_SERR:
7536         case GAUDI_EVENT_MME2_SBAB_SERR:
7537         case GAUDI_EVENT_MME3_SBAB_SERR:
7538                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7539                 params.block_address =
7540                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7541                 params.num_memories = 33;
7542                 params.derr = false;
7543                 params.disable_clock_gating = true;
7544                 extract_info_from_fw = false;
7545                 break;
7546         case GAUDI_EVENT_MME0_SBAB_DERR:
7547         case GAUDI_EVENT_MME1_SBAB_DERR:
7548         case GAUDI_EVENT_MME2_SBAB_DERR:
7549         case GAUDI_EVENT_MME3_SBAB_DERR:
7550                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7551                 params.block_address =
7552                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7553                 params.num_memories = 33;
7554                 params.derr = true;
7555                 params.disable_clock_gating = true;
7556                 extract_info_from_fw = false;
7557                 break;
7558         default:
7559                 return;
7560         }
7561
7562 extract_ecc_info:
7563         if (extract_info_from_fw) {
7564                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7565                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7566                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7567         } else {
7568                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7569                                 &ecc_syndrom, &memory_wrapper_idx);
7570                 if (rc)
7571                         return;
7572         }
7573
7574         dev_err(hdev->dev,
7575                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7576                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7577 }
7578
7579 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7580 {
7581         u64 qman_base;
7582         char desc[32];
7583         u32 qid_base;
7584         u8 index;
7585
7586         switch (event_type) {
7587         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7588                 index = event_type - GAUDI_EVENT_TPC0_QM;
7589                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7590                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7591                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7592                 break;
7593         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7594                 index = event_type - GAUDI_EVENT_MME0_QM;
7595                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7596                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7597                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7598                 break;
7599         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7600                 index = event_type - GAUDI_EVENT_DMA0_QM;
7601                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7602                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7603                 if (index > 1)
7604                         qid_base++;
7605                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7606                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7607                 break;
7608         case GAUDI_EVENT_NIC0_QM0:
7609                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7610                 qman_base = mmNIC0_QM0_BASE;
7611                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7612                 break;
7613         case GAUDI_EVENT_NIC0_QM1:
7614                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7615                 qman_base = mmNIC0_QM1_BASE;
7616                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7617                 break;
7618         case GAUDI_EVENT_NIC1_QM0:
7619                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7620                 qman_base = mmNIC1_QM0_BASE;
7621                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7622                 break;
7623         case GAUDI_EVENT_NIC1_QM1:
7624                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7625                 qman_base = mmNIC1_QM1_BASE;
7626                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7627                 break;
7628         case GAUDI_EVENT_NIC2_QM0:
7629                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7630                 qman_base = mmNIC2_QM0_BASE;
7631                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7632                 break;
7633         case GAUDI_EVENT_NIC2_QM1:
7634                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7635                 qman_base = mmNIC2_QM1_BASE;
7636                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7637                 break;
7638         case GAUDI_EVENT_NIC3_QM0:
7639                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7640                 qman_base = mmNIC3_QM0_BASE;
7641                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7642                 break;
7643         case GAUDI_EVENT_NIC3_QM1:
7644                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7645                 qman_base = mmNIC3_QM1_BASE;
7646                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7647                 break;
7648         case GAUDI_EVENT_NIC4_QM0:
7649                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7650                 qman_base = mmNIC4_QM0_BASE;
7651                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7652                 break;
7653         case GAUDI_EVENT_NIC4_QM1:
7654                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7655                 qman_base = mmNIC4_QM1_BASE;
7656                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7657                 break;
7658         default:
7659                 return;
7660         }
7661
7662         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7663 }
7664
7665 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7666                                         bool razwi)
7667 {
7668         char desc[64] = "";
7669
7670         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7671         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7672                 event_type, desc);
7673
7674         if (razwi) {
7675                 gaudi_print_razwi_info(hdev);
7676                 gaudi_print_mmu_error_info(hdev);
7677         }
7678 }
7679
7680 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7681                                         struct cpucp_pkt_sync_err *sync_err)
7682 {
7683         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7684
7685         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7686                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7687 }
7688
7689 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7690                                         struct hl_eq_fw_alive *fw_alive)
7691 {
7692         dev_err(hdev->dev,
7693                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7694                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7695                 "Minor" : "Critical", fw_alive->process_id,
7696                 fw_alive->thread_id, fw_alive->uptime_seconds);
7697 }
7698
7699 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7700 {
7701         struct gaudi_device *gaudi = hdev->asic_specific;
7702
7703         /* Unmask all IRQs since some could have been received
7704          * during the soft reset
7705          */
7706         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7707 }
7708
7709 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7710                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7711 {
7712         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7713         int rc = 0;
7714
7715         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7716                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7717                 if (!hbm_ecc_data) {
7718                         dev_err(hdev->dev, "No FW ECC data");
7719                         return 0;
7720                 }
7721
7722                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7723                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7724                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7725                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7726                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7727                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7728                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7729                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7730                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7731                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7732                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7733                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7734                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7735                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7736
7737                 dev_err(hdev->dev,
7738                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7739                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7740                 dev_err(hdev->dev,
7741                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7742                         device, ch, hbm_ecc_data->first_addr, type,
7743                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7744                         hbm_ecc_data->dec_cnt);
7745                 return 0;
7746         }
7747
7748         if (hdev->asic_prop.fw_security_enabled) {
7749                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7750                 return 0;
7751         }
7752
7753         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7754         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7755                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7756                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7757                 if (val) {
7758                         rc = -EIO;
7759                         dev_err(hdev->dev,
7760                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7761                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7762                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7763                                 (val >> 4) & 0x1);
7764
7765                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7766                         dev_err(hdev->dev,
7767                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7768                                 device, ch * 2,
7769                                 RREG32(base + ch * 0x1000 + 0x064),
7770                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7771                                 (val2 & 0xFF0000) >> 16,
7772                                 (val2 & 0xFF000000) >> 24);
7773                 }
7774
7775                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7776                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7777                 if (val) {
7778                         rc = -EIO;
7779                         dev_err(hdev->dev,
7780                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7781                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7782                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7783                                 (val >> 4) & 0x1);
7784
7785                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7786                         dev_err(hdev->dev,
7787                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7788                                 device, ch * 2 + 1,
7789                                 RREG32(base + ch * 0x1000 + 0x074),
7790                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7791                                 (val2 & 0xFF0000) >> 16,
7792                                 (val2 & 0xFF000000) >> 24);
7793                 }
7794
7795                 /* Clear interrupts */
7796                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7797                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7798                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7799                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7800                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7801                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7802         }
7803
7804         val  = RREG32(base + 0x8F30);
7805         val2 = RREG32(base + 0x8F34);
7806         if (val | val2) {
7807                 rc = -EIO;
7808                 dev_err(hdev->dev,
7809                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7810                         device, val, val2);
7811         }
7812         val  = RREG32(base + 0x8F40);
7813         val2 = RREG32(base + 0x8F44);
7814         if (val | val2) {
7815                 rc = -EIO;
7816                 dev_err(hdev->dev,
7817                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7818                         device, val, val2);
7819         }
7820
7821         return rc;
7822 }
7823
7824 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7825 {
7826         switch (hbm_event_type) {
7827         case GAUDI_EVENT_HBM0_SPI_0:
7828         case GAUDI_EVENT_HBM0_SPI_1:
7829                 return 0;
7830         case GAUDI_EVENT_HBM1_SPI_0:
7831         case GAUDI_EVENT_HBM1_SPI_1:
7832                 return 1;
7833         case GAUDI_EVENT_HBM2_SPI_0:
7834         case GAUDI_EVENT_HBM2_SPI_1:
7835                 return 2;
7836         case GAUDI_EVENT_HBM3_SPI_0:
7837         case GAUDI_EVENT_HBM3_SPI_1:
7838                 return 3;
7839         default:
7840                 break;
7841         }
7842
7843         /* Should never happen */
7844         return 0;
7845 }
7846
7847 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7848                                         char *interrupt_name)
7849 {
7850         struct gaudi_device *gaudi = hdev->asic_specific;
7851         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7852         bool soft_reset_required = false;
7853
7854         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7855          * gating, and thus cannot be done in CPU-CP and should be done instead
7856          * by the driver.
7857          */
7858
7859         mutex_lock(&gaudi->clk_gate_mutex);
7860
7861         hdev->asic_funcs->disable_clock_gating(hdev);
7862
7863         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7864                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7865
7866         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7867                 if (tpc_interrupts_cause & BIT(i)) {
7868                         dev_err_ratelimited(hdev->dev,
7869                                         "TPC%d_%s interrupt cause: %s\n",
7870                                         tpc_id, interrupt_name,
7871                                         gaudi_tpc_interrupts_cause[i]);
7872                         /* If this is QM error, we need to soft-reset */
7873                         if (i == 15)
7874                                 soft_reset_required = true;
7875                 }
7876
7877         /* Clear interrupts */
7878         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7879
7880         hdev->asic_funcs->set_clock_gating(hdev);
7881
7882         mutex_unlock(&gaudi->clk_gate_mutex);
7883
7884         return soft_reset_required;
7885 }
7886
7887 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7888 {
7889         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7890 }
7891
7892 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7893 {
7894         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7895 }
7896
7897 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7898                                         u16 event_type)
7899 {
7900         switch (event_type) {
7901         case GAUDI_EVENT_FIX_POWER_ENV_S:
7902                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7903                 dev_info_ratelimited(hdev->dev,
7904                         "Clock throttling due to power consumption\n");
7905                 break;
7906
7907         case GAUDI_EVENT_FIX_POWER_ENV_E:
7908                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7909                 dev_info_ratelimited(hdev->dev,
7910                         "Power envelop is safe, back to optimal clock\n");
7911                 break;
7912
7913         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7914                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7915                 dev_info_ratelimited(hdev->dev,
7916                         "Clock throttling due to overheating\n");
7917                 break;
7918
7919         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7920                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7921                 dev_info_ratelimited(hdev->dev,
7922                         "Thermal envelop is safe, back to optimal clock\n");
7923                 break;
7924
7925         default:
7926                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7927                         event_type);
7928                 break;
7929         }
7930 }
7931
7932 static void gaudi_handle_eqe(struct hl_device *hdev,
7933                                 struct hl_eq_entry *eq_entry)
7934 {
7935         struct gaudi_device *gaudi = hdev->asic_specific;
7936         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7937         u32 fw_fatal_err_flag = 0;
7938         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7939                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7940         bool reset_required;
7941         u8 cause;
7942         int rc;
7943
7944         if (event_type >= GAUDI_EVENT_SIZE) {
7945                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7946                                 event_type, GAUDI_EVENT_SIZE - 1);
7947                 return;
7948         }
7949
7950         gaudi->events_stat[event_type]++;
7951         gaudi->events_stat_aggregate[event_type]++;
7952
7953         switch (event_type) {
7954         case GAUDI_EVENT_PCIE_CORE_DERR:
7955         case GAUDI_EVENT_PCIE_IF_DERR:
7956         case GAUDI_EVENT_PCIE_PHY_DERR:
7957         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7958         case GAUDI_EVENT_MME0_ACC_DERR:
7959         case GAUDI_EVENT_MME0_SBAB_DERR:
7960         case GAUDI_EVENT_MME1_ACC_DERR:
7961         case GAUDI_EVENT_MME1_SBAB_DERR:
7962         case GAUDI_EVENT_MME2_ACC_DERR:
7963         case GAUDI_EVENT_MME2_SBAB_DERR:
7964         case GAUDI_EVENT_MME3_ACC_DERR:
7965         case GAUDI_EVENT_MME3_SBAB_DERR:
7966         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7967                 fallthrough;
7968         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7969         case GAUDI_EVENT_PSOC_MEM_DERR:
7970         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7971         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7972         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7973         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7974         case GAUDI_EVENT_MMU_DERR:
7975         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7976                 gaudi_print_irq_info(hdev, event_type, true);
7977                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7978                 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
7979                 goto reset_device;
7980
7981         case GAUDI_EVENT_GIC500:
7982         case GAUDI_EVENT_AXI_ECC:
7983         case GAUDI_EVENT_L2_RAM_ECC:
7984         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7985                 gaudi_print_irq_info(hdev, event_type, false);
7986                 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
7987                 goto reset_device;
7988
7989         case GAUDI_EVENT_HBM0_SPI_0:
7990         case GAUDI_EVENT_HBM1_SPI_0:
7991         case GAUDI_EVENT_HBM2_SPI_0:
7992         case GAUDI_EVENT_HBM3_SPI_0:
7993                 gaudi_print_irq_info(hdev, event_type, false);
7994                 gaudi_hbm_read_interrupts(hdev,
7995                                 gaudi_hbm_event_to_dev(event_type),
7996                                 &eq_entry->hbm_ecc_data);
7997                 fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
7998                 goto reset_device;
7999
8000         case GAUDI_EVENT_HBM0_SPI_1:
8001         case GAUDI_EVENT_HBM1_SPI_1:
8002         case GAUDI_EVENT_HBM2_SPI_1:
8003         case GAUDI_EVENT_HBM3_SPI_1:
8004                 gaudi_print_irq_info(hdev, event_type, false);
8005                 gaudi_hbm_read_interrupts(hdev,
8006                                 gaudi_hbm_event_to_dev(event_type),
8007                                 &eq_entry->hbm_ecc_data);
8008                 hl_fw_unmask_irq(hdev, event_type);
8009                 break;
8010
8011         case GAUDI_EVENT_TPC0_DEC:
8012         case GAUDI_EVENT_TPC1_DEC:
8013         case GAUDI_EVENT_TPC2_DEC:
8014         case GAUDI_EVENT_TPC3_DEC:
8015         case GAUDI_EVENT_TPC4_DEC:
8016         case GAUDI_EVENT_TPC5_DEC:
8017         case GAUDI_EVENT_TPC6_DEC:
8018         case GAUDI_EVENT_TPC7_DEC:
8019                 gaudi_print_irq_info(hdev, event_type, true);
8020                 reset_required = gaudi_tpc_read_interrupts(hdev,
8021                                         tpc_dec_event_to_tpc_id(event_type),
8022                                         "AXI_SLV_DEC_Error");
8023                 if (reset_required) {
8024                         dev_err(hdev->dev, "reset required due to %s\n",
8025                                 gaudi_irq_map_table[event_type].name);
8026
8027                         hl_device_reset(hdev, 0);
8028                 } else {
8029                         hl_fw_unmask_irq(hdev, event_type);
8030                 }
8031                 break;
8032
8033         case GAUDI_EVENT_TPC0_KRN_ERR:
8034         case GAUDI_EVENT_TPC1_KRN_ERR:
8035         case GAUDI_EVENT_TPC2_KRN_ERR:
8036         case GAUDI_EVENT_TPC3_KRN_ERR:
8037         case GAUDI_EVENT_TPC4_KRN_ERR:
8038         case GAUDI_EVENT_TPC5_KRN_ERR:
8039         case GAUDI_EVENT_TPC6_KRN_ERR:
8040         case GAUDI_EVENT_TPC7_KRN_ERR:
8041                 gaudi_print_irq_info(hdev, event_type, true);
8042                 reset_required = gaudi_tpc_read_interrupts(hdev,
8043                                         tpc_krn_event_to_tpc_id(event_type),
8044                                         "KRN_ERR");
8045                 if (reset_required) {
8046                         dev_err(hdev->dev, "reset required due to %s\n",
8047                                 gaudi_irq_map_table[event_type].name);
8048
8049                         hl_device_reset(hdev, 0);
8050                 } else {
8051                         hl_fw_unmask_irq(hdev, event_type);
8052                 }
8053                 break;
8054
8055         case GAUDI_EVENT_PCIE_CORE_SERR:
8056         case GAUDI_EVENT_PCIE_IF_SERR:
8057         case GAUDI_EVENT_PCIE_PHY_SERR:
8058         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8059         case GAUDI_EVENT_MME0_ACC_SERR:
8060         case GAUDI_EVENT_MME0_SBAB_SERR:
8061         case GAUDI_EVENT_MME1_ACC_SERR:
8062         case GAUDI_EVENT_MME1_SBAB_SERR:
8063         case GAUDI_EVENT_MME2_ACC_SERR:
8064         case GAUDI_EVENT_MME2_SBAB_SERR:
8065         case GAUDI_EVENT_MME3_ACC_SERR:
8066         case GAUDI_EVENT_MME3_SBAB_SERR:
8067         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8068         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8069         case GAUDI_EVENT_PSOC_MEM_SERR:
8070         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8071         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8072         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8073         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8074                 fallthrough;
8075         case GAUDI_EVENT_MMU_SERR:
8076                 gaudi_print_irq_info(hdev, event_type, true);
8077                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8078                 hl_fw_unmask_irq(hdev, event_type);
8079                 break;
8080
8081         case GAUDI_EVENT_PCIE_DEC:
8082         case GAUDI_EVENT_MME0_WBC_RSP:
8083         case GAUDI_EVENT_MME0_SBAB0_RSP:
8084         case GAUDI_EVENT_MME1_WBC_RSP:
8085         case GAUDI_EVENT_MME1_SBAB0_RSP:
8086         case GAUDI_EVENT_MME2_WBC_RSP:
8087         case GAUDI_EVENT_MME2_SBAB0_RSP:
8088         case GAUDI_EVENT_MME3_WBC_RSP:
8089         case GAUDI_EVENT_MME3_SBAB0_RSP:
8090         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8091         case GAUDI_EVENT_PSOC_AXI_DEC:
8092         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8093         case GAUDI_EVENT_MMU_PAGE_FAULT:
8094         case GAUDI_EVENT_MMU_WR_PERM:
8095         case GAUDI_EVENT_RAZWI_OR_ADC:
8096         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8097         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8098         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8099                 fallthrough;
8100         case GAUDI_EVENT_NIC0_QM0:
8101         case GAUDI_EVENT_NIC0_QM1:
8102         case GAUDI_EVENT_NIC1_QM0:
8103         case GAUDI_EVENT_NIC1_QM1:
8104         case GAUDI_EVENT_NIC2_QM0:
8105         case GAUDI_EVENT_NIC2_QM1:
8106         case GAUDI_EVENT_NIC3_QM0:
8107         case GAUDI_EVENT_NIC3_QM1:
8108         case GAUDI_EVENT_NIC4_QM0:
8109         case GAUDI_EVENT_NIC4_QM1:
8110         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8111                 gaudi_print_irq_info(hdev, event_type, true);
8112                 gaudi_handle_qman_err(hdev, event_type);
8113                 hl_fw_unmask_irq(hdev, event_type);
8114                 break;
8115
8116         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8117                 gaudi_print_irq_info(hdev, event_type, true);
8118                 goto reset_device;
8119
8120         case GAUDI_EVENT_TPC0_BMON_SPMU:
8121         case GAUDI_EVENT_TPC1_BMON_SPMU:
8122         case GAUDI_EVENT_TPC2_BMON_SPMU:
8123         case GAUDI_EVENT_TPC3_BMON_SPMU:
8124         case GAUDI_EVENT_TPC4_BMON_SPMU:
8125         case GAUDI_EVENT_TPC5_BMON_SPMU:
8126         case GAUDI_EVENT_TPC6_BMON_SPMU:
8127         case GAUDI_EVENT_TPC7_BMON_SPMU:
8128         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8129                 gaudi_print_irq_info(hdev, event_type, false);
8130                 hl_fw_unmask_irq(hdev, event_type);
8131                 break;
8132
8133         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8134                 gaudi_print_irq_info(hdev, event_type, false);
8135                 gaudi_print_sm_sei_info(hdev, event_type,
8136                                         &eq_entry->sm_sei_data);
8137                 rc = hl_state_dump(hdev);
8138                 if (rc)
8139                         dev_err(hdev->dev,
8140                                 "Error during system state dump %d\n", rc);
8141                 hl_fw_unmask_irq(hdev, event_type);
8142                 break;
8143
8144         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8145                 gaudi_print_clk_change_info(hdev, event_type);
8146                 hl_fw_unmask_irq(hdev, event_type);
8147                 break;
8148
8149         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8150                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8151                 dev_err(hdev->dev,
8152                         "Received high temp H/W interrupt %d (cause %d)\n",
8153                         event_type, cause);
8154                 break;
8155
8156         case GAUDI_EVENT_DEV_RESET_REQ:
8157                 gaudi_print_irq_info(hdev, event_type, false);
8158                 goto reset_device;
8159
8160         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8161                 gaudi_print_irq_info(hdev, event_type, false);
8162                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8163                 goto reset_device;
8164
8165         case GAUDI_EVENT_FW_ALIVE_S:
8166                 gaudi_print_irq_info(hdev, event_type, false);
8167                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8168                 goto reset_device;
8169
8170         default:
8171                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8172                                 event_type);
8173                 break;
8174         }
8175
8176         return;
8177
8178 reset_device:
8179         if (hdev->asic_prop.fw_security_enabled)
8180                 hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
8181         else if (hdev->hard_reset_on_fw_events)
8182                 hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
8183         else
8184                 hl_fw_unmask_irq(hdev, event_type);
8185 }
8186
8187 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8188                                         u32 *size)
8189 {
8190         struct gaudi_device *gaudi = hdev->asic_specific;
8191
8192         if (aggregate) {
8193                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8194                 return gaudi->events_stat_aggregate;
8195         }
8196
8197         *size = (u32) sizeof(gaudi->events_stat);
8198         return gaudi->events_stat;
8199 }
8200
8201 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8202                                         u32 flags)
8203 {
8204         struct gaudi_device *gaudi = hdev->asic_specific;
8205         u32 status, timeout_usec;
8206         int rc;
8207
8208         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8209                 hdev->hard_reset_pending)
8210                 return 0;
8211
8212         if (hdev->pldm)
8213                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8214         else
8215                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8216
8217         /* L0 & L1 invalidation */
8218         WREG32(mmSTLB_INV_PS, 3);
8219         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8220         WREG32(mmSTLB_INV_PS, 2);
8221
8222         rc = hl_poll_timeout(
8223                 hdev,
8224                 mmSTLB_INV_PS,
8225                 status,
8226                 !status,
8227                 1000,
8228                 timeout_usec);
8229
8230         WREG32(mmSTLB_INV_SET, 0);
8231
8232         if (rc) {
8233                 dev_err_ratelimited(hdev->dev,
8234                                         "MMU cache invalidation timeout\n");
8235                 hl_device_reset(hdev, HL_RESET_HARD);
8236         }
8237
8238         return rc;
8239 }
8240
8241 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8242                                                 bool is_hard, u32 flags,
8243                                                 u32 asid, u64 va, u64 size)
8244 {
8245         /* Treat as invalidate all because there is no range invalidation
8246          * in Gaudi
8247          */
8248         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8249 }
8250
8251 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8252                                         u32 asid, u64 phys_addr)
8253 {
8254         u32 status, timeout_usec;
8255         int rc;
8256
8257         if (hdev->pldm)
8258                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8259         else
8260                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8261
8262         WREG32(MMU_ASID, asid);
8263         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8264         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8265         WREG32(MMU_BUSY, 0x80000000);
8266
8267         rc = hl_poll_timeout(
8268                 hdev,
8269                 MMU_BUSY,
8270                 status,
8271                 !(status & 0x80000000),
8272                 1000,
8273                 timeout_usec);
8274
8275         if (rc) {
8276                 dev_err(hdev->dev,
8277                         "Timeout during MMU hop0 config of asid %d\n", asid);
8278                 return rc;
8279         }
8280
8281         return 0;
8282 }
8283
8284 static int gaudi_send_heartbeat(struct hl_device *hdev)
8285 {
8286         struct gaudi_device *gaudi = hdev->asic_specific;
8287
8288         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8289                 return 0;
8290
8291         return hl_fw_send_heartbeat(hdev);
8292 }
8293
8294 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8295 {
8296         struct gaudi_device *gaudi = hdev->asic_specific;
8297         struct asic_fixed_properties *prop = &hdev->asic_prop;
8298         int rc;
8299
8300         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8301                 return 0;
8302
8303         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8304                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8305                                         mmCPU_BOOT_ERR1);
8306         if (rc)
8307                 return rc;
8308
8309         if (!strlen(prop->cpucp_info.card_name))
8310                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8311                                 CARD_NAME_MAX_LEN);
8312
8313         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8314
8315         set_default_power_values(hdev);
8316
8317         hdev->max_power = prop->max_power_default;
8318
8319         return 0;
8320 }
8321
8322 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8323                                         u8 mask_len, struct seq_file *s)
8324 {
8325         struct gaudi_device *gaudi = hdev->asic_specific;
8326         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8327         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8328         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8329         unsigned long *mask = (unsigned long *)mask_arr;
8330         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8331         bool is_idle = true, is_eng_idle, is_slave;
8332         u64 offset;
8333         int i, dma_id, port;
8334
8335         mutex_lock(&gaudi->clk_gate_mutex);
8336
8337         hdev->asic_funcs->disable_clock_gating(hdev);
8338
8339         if (s)
8340                 seq_puts(s,
8341                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8342                         "---  -------  ------------  ----------  -------------\n");
8343
8344         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8345                 dma_id = gaudi_dma_assignment[i];
8346                 offset = dma_id * DMA_QMAN_OFFSET;
8347
8348                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8349                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8350                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8351                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8352                                 IS_DMA_IDLE(dma_core_sts0);
8353                 is_idle &= is_eng_idle;
8354
8355                 if (mask && !is_eng_idle)
8356                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8357                 if (s)
8358                         seq_printf(s, fmt, dma_id,
8359                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8360                                 qm_cgm_sts, dma_core_sts0);
8361         }
8362
8363         if (s)
8364                 seq_puts(s,
8365                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8366                         "---  -------  ------------  ----------  ----------\n");
8367
8368         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8369                 offset = i * TPC_QMAN_OFFSET;
8370                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8371                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8372                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8373                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8374                                 IS_TPC_IDLE(tpc_cfg_sts);
8375                 is_idle &= is_eng_idle;
8376
8377                 if (mask && !is_eng_idle)
8378                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8379                 if (s)
8380                         seq_printf(s, fmt, i,
8381                                 is_eng_idle ? "Y" : "N",
8382                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8383         }
8384
8385         if (s)
8386                 seq_puts(s,
8387                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8388                         "---  -------  ------------  ----------  -----------\n");
8389
8390         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8391                 offset = i * MME_QMAN_OFFSET;
8392                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8393                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8394
8395                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8396                 is_slave = i % 2;
8397                 if (!is_slave) {
8398                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8399                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8400                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8401                 }
8402
8403                 is_idle &= is_eng_idle;
8404
8405                 if (mask && !is_eng_idle)
8406                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8407                 if (s) {
8408                         if (!is_slave)
8409                                 seq_printf(s, fmt, i,
8410                                         is_eng_idle ? "Y" : "N",
8411                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8412                         else
8413                                 seq_printf(s, mme_slave_fmt, i,
8414                                         is_eng_idle ? "Y" : "N", "-",
8415                                         "-", mme_arch_sts);
8416                 }
8417         }
8418
8419         if (s)
8420                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8421                                 "---  -------  ------------  ----------\n");
8422
8423         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8424                 offset = i * NIC_MACRO_QMAN_OFFSET;
8425                 port = 2 * i;
8426                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8427                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8428                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8429                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8430                         is_idle &= is_eng_idle;
8431
8432                         if (mask && !is_eng_idle)
8433                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8434                         if (s)
8435                                 seq_printf(s, nic_fmt, port,
8436                                                 is_eng_idle ? "Y" : "N",
8437                                                 qm_glbl_sts0, qm_cgm_sts);
8438                 }
8439
8440                 port = 2 * i + 1;
8441                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8442                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8443                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8444                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8445                         is_idle &= is_eng_idle;
8446
8447                         if (mask && !is_eng_idle)
8448                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8449                         if (s)
8450                                 seq_printf(s, nic_fmt, port,
8451                                                 is_eng_idle ? "Y" : "N",
8452                                                 qm_glbl_sts0, qm_cgm_sts);
8453                 }
8454         }
8455
8456         if (s)
8457                 seq_puts(s, "\n");
8458
8459         hdev->asic_funcs->set_clock_gating(hdev);
8460
8461         mutex_unlock(&gaudi->clk_gate_mutex);
8462
8463         return is_idle;
8464 }
8465
8466 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8467         __acquires(&gaudi->hw_queues_lock)
8468 {
8469         struct gaudi_device *gaudi = hdev->asic_specific;
8470
8471         spin_lock(&gaudi->hw_queues_lock);
8472 }
8473
8474 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8475         __releases(&gaudi->hw_queues_lock)
8476 {
8477         struct gaudi_device *gaudi = hdev->asic_specific;
8478
8479         spin_unlock(&gaudi->hw_queues_lock);
8480 }
8481
8482 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8483 {
8484         return hdev->pdev->device;
8485 }
8486
8487 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8488                                 size_t max_size)
8489 {
8490         struct gaudi_device *gaudi = hdev->asic_specific;
8491
8492         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8493                 return 0;
8494
8495         return hl_fw_get_eeprom_data(hdev, data, max_size);
8496 }
8497
8498 /*
8499  * this function should be used only during initialization and/or after reset,
8500  * when there are no active users.
8501  */
8502 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8503                                 u32 tpc_id)
8504 {
8505         struct gaudi_device *gaudi = hdev->asic_specific;
8506         u64 kernel_timeout;
8507         u32 status, offset;
8508         int rc;
8509
8510         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8511
8512         if (hdev->pldm)
8513                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8514         else
8515                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8516
8517         mutex_lock(&gaudi->clk_gate_mutex);
8518
8519         hdev->asic_funcs->disable_clock_gating(hdev);
8520
8521         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8522                         lower_32_bits(tpc_kernel));
8523         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8524                         upper_32_bits(tpc_kernel));
8525
8526         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8527                         lower_32_bits(tpc_kernel));
8528         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8529                         upper_32_bits(tpc_kernel));
8530         /* set a valid LUT pointer, content is of no significance */
8531         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8532                         lower_32_bits(tpc_kernel));
8533         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8534                         upper_32_bits(tpc_kernel));
8535
8536         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8537                         lower_32_bits(CFG_BASE +
8538                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8539
8540         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8541                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8542                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8543         /* wait a bit for the engine to start executing */
8544         usleep_range(1000, 1500);
8545
8546         /* wait until engine has finished executing */
8547         rc = hl_poll_timeout(
8548                 hdev,
8549                 mmTPC0_CFG_STATUS + offset,
8550                 status,
8551                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8552                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8553                 1000,
8554                 kernel_timeout);
8555
8556         if (rc) {
8557                 dev_err(hdev->dev,
8558                         "Timeout while waiting for TPC%d icache prefetch\n",
8559                         tpc_id);
8560                 hdev->asic_funcs->set_clock_gating(hdev);
8561                 mutex_unlock(&gaudi->clk_gate_mutex);
8562                 return -EIO;
8563         }
8564
8565         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8566                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8567
8568         /* wait a bit for the engine to start executing */
8569         usleep_range(1000, 1500);
8570
8571         /* wait until engine has finished executing */
8572         rc = hl_poll_timeout(
8573                 hdev,
8574                 mmTPC0_CFG_STATUS + offset,
8575                 status,
8576                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8577                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8578                 1000,
8579                 kernel_timeout);
8580
8581         if (rc) {
8582                 dev_err(hdev->dev,
8583                         "Timeout while waiting for TPC%d vector pipe\n",
8584                         tpc_id);
8585                 hdev->asic_funcs->set_clock_gating(hdev);
8586                 mutex_unlock(&gaudi->clk_gate_mutex);
8587                 return -EIO;
8588         }
8589
8590         rc = hl_poll_timeout(
8591                 hdev,
8592                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8593                 status,
8594                 (status == 0),
8595                 1000,
8596                 kernel_timeout);
8597
8598         hdev->asic_funcs->set_clock_gating(hdev);
8599         mutex_unlock(&gaudi->clk_gate_mutex);
8600
8601         if (rc) {
8602                 dev_err(hdev->dev,
8603                         "Timeout while waiting for TPC%d kernel to execute\n",
8604                         tpc_id);
8605                 return -EIO;
8606         }
8607
8608         return 0;
8609 }
8610
8611 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8612                 struct hl_ctx *ctx)
8613 {
8614         struct gaudi_device *gaudi = hdev->asic_specific;
8615         int min_alloc_order, rc, collective_cb_size;
8616
8617         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8618                 return 0;
8619
8620         hdev->internal_cb_pool_virt_addr =
8621                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8622                                         HOST_SPACE_INTERNAL_CB_SZ,
8623                                         &hdev->internal_cb_pool_dma_addr,
8624                                         GFP_KERNEL | __GFP_ZERO);
8625
8626         if (!hdev->internal_cb_pool_virt_addr)
8627                 return -ENOMEM;
8628
8629         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8630                         sizeof(struct packet_fence);
8631         min_alloc_order = ilog2(collective_cb_size);
8632
8633         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8634         if (!hdev->internal_cb_pool) {
8635                 dev_err(hdev->dev,
8636                         "Failed to create internal CB pool\n");
8637                 rc = -ENOMEM;
8638                 goto free_internal_cb_pool;
8639         }
8640
8641         rc = gen_pool_add(hdev->internal_cb_pool,
8642                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8643                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8644         if (rc) {
8645                 dev_err(hdev->dev,
8646                         "Failed to add memory to internal CB pool\n");
8647                 rc = -EFAULT;
8648                 goto destroy_internal_cb_pool;
8649         }
8650
8651         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8652                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8653                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8654
8655         if (!hdev->internal_cb_va_base) {
8656                 rc = -ENOMEM;
8657                 goto destroy_internal_cb_pool;
8658         }
8659
8660         mutex_lock(&ctx->mmu_lock);
8661         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8662                         hdev->internal_cb_pool_dma_addr,
8663                         HOST_SPACE_INTERNAL_CB_SZ);
8664
8665         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8666         mutex_unlock(&ctx->mmu_lock);
8667
8668         if (rc)
8669                 goto unreserve_internal_cb_pool;
8670
8671         return 0;
8672
8673 unreserve_internal_cb_pool:
8674         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8675                         HOST_SPACE_INTERNAL_CB_SZ);
8676 destroy_internal_cb_pool:
8677         gen_pool_destroy(hdev->internal_cb_pool);
8678 free_internal_cb_pool:
8679         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8680                         HOST_SPACE_INTERNAL_CB_SZ,
8681                         hdev->internal_cb_pool_virt_addr,
8682                         hdev->internal_cb_pool_dma_addr);
8683
8684         return rc;
8685 }
8686
8687 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8688                 struct hl_ctx *ctx)
8689 {
8690         struct gaudi_device *gaudi = hdev->asic_specific;
8691
8692         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8693                 return;
8694
8695         mutex_lock(&ctx->mmu_lock);
8696         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8697                         HOST_SPACE_INTERNAL_CB_SZ);
8698         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8699                         HOST_SPACE_INTERNAL_CB_SZ);
8700         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8701         mutex_unlock(&ctx->mmu_lock);
8702
8703         gen_pool_destroy(hdev->internal_cb_pool);
8704
8705         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8706                         HOST_SPACE_INTERNAL_CB_SZ,
8707                         hdev->internal_cb_pool_virt_addr,
8708                         hdev->internal_cb_pool_dma_addr);
8709 }
8710
8711 static int gaudi_ctx_init(struct hl_ctx *ctx)
8712 {
8713         int rc;
8714
8715         if (ctx->asid == HL_KERNEL_ASID_ID)
8716                 return 0;
8717
8718         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8719         if (rc)
8720                 return rc;
8721
8722         rc = gaudi_restore_user_registers(ctx->hdev);
8723         if (rc)
8724                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8725
8726         return rc;
8727 }
8728
8729 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8730 {
8731         if (ctx->asid == HL_KERNEL_ASID_ID)
8732                 return;
8733
8734         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8735 }
8736
8737 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8738 {
8739         return gaudi_cq_assignment[cq_idx];
8740 }
8741
8742 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8743 {
8744         return sizeof(struct packet_msg_short) +
8745                         sizeof(struct packet_msg_prot) * 2;
8746 }
8747
8748 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8749 {
8750         return sizeof(struct packet_msg_short) * 4 +
8751                         sizeof(struct packet_fence) +
8752                         sizeof(struct packet_msg_prot) * 2;
8753 }
8754
8755 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8756 {
8757         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8758 }
8759
8760 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8761                                 u32 size, bool eb)
8762 {
8763         struct hl_cb *cb = (struct hl_cb *) data;
8764         struct packet_msg_short *pkt;
8765         u32 value, ctl, pkt_size = sizeof(*pkt);
8766
8767         pkt = cb->kernel_address + size;
8768         memset(pkt, 0, pkt_size);
8769
8770         /* Inc by 1, Mode ADD */
8771         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8772         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8773
8774         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8775         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8776         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8777         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8778         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8779         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8780         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8781
8782         pkt->value = cpu_to_le32(value);
8783         pkt->ctl = cpu_to_le32(ctl);
8784
8785         return size + pkt_size;
8786 }
8787
8788 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8789                                         u16 addr)
8790 {
8791         u32 ctl, pkt_size = sizeof(*pkt);
8792
8793         memset(pkt, 0, pkt_size);
8794
8795         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8796         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8797         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8798         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8799         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8800         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8801
8802         pkt->value = cpu_to_le32(value);
8803         pkt->ctl = cpu_to_le32(ctl);
8804
8805         return pkt_size;
8806 }
8807
8808 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8809                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8810                 u16 sob_val, u16 mon_id)
8811 {
8812         u64 monitor_base;
8813         u32 ctl, value, pkt_size = sizeof(*pkt);
8814         u16 msg_addr_offset;
8815         u8 mask;
8816
8817         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8818                 dev_err(hdev->dev,
8819                         "sob_base %u (mask %#x) is not valid\n",
8820                         sob_base, sob_mask);
8821                 return 0;
8822         }
8823
8824         /*
8825          * monitor_base should be the content of the base0 address registers,
8826          * so it will be added to the msg short offsets
8827          */
8828         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8829
8830         msg_addr_offset =
8831                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8832                                 monitor_base;
8833
8834         memset(pkt, 0, pkt_size);
8835
8836         /* Monitor config packet: bind the monitor to a sync object */
8837         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8838         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8839         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8840                         0); /* GREATER OR EQUAL*/
8841         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8842
8843         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8844         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8845         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8846         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8847         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8848         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8849         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8850
8851         pkt->value = cpu_to_le32(value);
8852         pkt->ctl = cpu_to_le32(ctl);
8853
8854         return pkt_size;
8855 }
8856
8857 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8858 {
8859         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8860
8861         memset(pkt, 0, pkt_size);
8862
8863         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8864         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8865         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8866
8867         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8868         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8869         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8870         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8871
8872         pkt->cfg = cpu_to_le32(cfg);
8873         pkt->ctl = cpu_to_le32(ctl);
8874
8875         return pkt_size;
8876 }
8877
8878 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8879 {
8880         u32 offset, nic_index;
8881
8882         switch (queue_id) {
8883         case GAUDI_QUEUE_ID_DMA_0_0:
8884                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8885                 break;
8886         case GAUDI_QUEUE_ID_DMA_0_1:
8887                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8888                 break;
8889         case GAUDI_QUEUE_ID_DMA_0_2:
8890                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8891                 break;
8892         case GAUDI_QUEUE_ID_DMA_0_3:
8893                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8894                 break;
8895         case GAUDI_QUEUE_ID_DMA_1_0:
8896                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8897                 break;
8898         case GAUDI_QUEUE_ID_DMA_1_1:
8899                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8900                 break;
8901         case GAUDI_QUEUE_ID_DMA_1_2:
8902                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8903                 break;
8904         case GAUDI_QUEUE_ID_DMA_1_3:
8905                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8906                 break;
8907         case GAUDI_QUEUE_ID_DMA_5_0:
8908                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8909                 break;
8910         case GAUDI_QUEUE_ID_DMA_5_1:
8911                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8912                 break;
8913         case GAUDI_QUEUE_ID_DMA_5_2:
8914                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8915                 break;
8916         case GAUDI_QUEUE_ID_DMA_5_3:
8917                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8918                 break;
8919         case GAUDI_QUEUE_ID_TPC_7_0:
8920                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8921                 break;
8922         case GAUDI_QUEUE_ID_TPC_7_1:
8923                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8924                 break;
8925         case GAUDI_QUEUE_ID_TPC_7_2:
8926                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8927                 break;
8928         case GAUDI_QUEUE_ID_TPC_7_3:
8929                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8930                 break;
8931         case GAUDI_QUEUE_ID_NIC_0_0:
8932         case GAUDI_QUEUE_ID_NIC_1_0:
8933         case GAUDI_QUEUE_ID_NIC_2_0:
8934         case GAUDI_QUEUE_ID_NIC_3_0:
8935         case GAUDI_QUEUE_ID_NIC_4_0:
8936         case GAUDI_QUEUE_ID_NIC_5_0:
8937         case GAUDI_QUEUE_ID_NIC_6_0:
8938         case GAUDI_QUEUE_ID_NIC_7_0:
8939         case GAUDI_QUEUE_ID_NIC_8_0:
8940         case GAUDI_QUEUE_ID_NIC_9_0:
8941                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8942                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8943                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8944                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8945                 break;
8946         case GAUDI_QUEUE_ID_NIC_0_1:
8947         case GAUDI_QUEUE_ID_NIC_1_1:
8948         case GAUDI_QUEUE_ID_NIC_2_1:
8949         case GAUDI_QUEUE_ID_NIC_3_1:
8950         case GAUDI_QUEUE_ID_NIC_4_1:
8951         case GAUDI_QUEUE_ID_NIC_5_1:
8952         case GAUDI_QUEUE_ID_NIC_6_1:
8953         case GAUDI_QUEUE_ID_NIC_7_1:
8954         case GAUDI_QUEUE_ID_NIC_8_1:
8955         case GAUDI_QUEUE_ID_NIC_9_1:
8956                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8957                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8958                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8959                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8960                 break;
8961         case GAUDI_QUEUE_ID_NIC_0_2:
8962         case GAUDI_QUEUE_ID_NIC_1_2:
8963         case GAUDI_QUEUE_ID_NIC_2_2:
8964         case GAUDI_QUEUE_ID_NIC_3_2:
8965         case GAUDI_QUEUE_ID_NIC_4_2:
8966         case GAUDI_QUEUE_ID_NIC_5_2:
8967         case GAUDI_QUEUE_ID_NIC_6_2:
8968         case GAUDI_QUEUE_ID_NIC_7_2:
8969         case GAUDI_QUEUE_ID_NIC_8_2:
8970         case GAUDI_QUEUE_ID_NIC_9_2:
8971                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8972                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8973                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8974                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8975                 break;
8976         case GAUDI_QUEUE_ID_NIC_0_3:
8977         case GAUDI_QUEUE_ID_NIC_1_3:
8978         case GAUDI_QUEUE_ID_NIC_2_3:
8979         case GAUDI_QUEUE_ID_NIC_3_3:
8980         case GAUDI_QUEUE_ID_NIC_4_3:
8981         case GAUDI_QUEUE_ID_NIC_5_3:
8982         case GAUDI_QUEUE_ID_NIC_6_3:
8983         case GAUDI_QUEUE_ID_NIC_7_3:
8984         case GAUDI_QUEUE_ID_NIC_8_3:
8985         case GAUDI_QUEUE_ID_NIC_9_3:
8986                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8987                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8988                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8989                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8990                 break;
8991         default:
8992                 return -EINVAL;
8993         }
8994
8995         *addr = CFG_BASE + offset;
8996
8997         return 0;
8998 }
8999
9000 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
9001 {
9002         u64 monitor_base;
9003         u32 size = 0;
9004         u16 msg_addr_offset;
9005
9006         /*
9007          * monitor_base should be the content of the base0 address registers,
9008          * so it will be added to the msg short offsets
9009          */
9010         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
9011
9012         /* First monitor config packet: low address of the sync */
9013         msg_addr_offset =
9014                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
9015                                 monitor_base;
9016
9017         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
9018                                         msg_addr_offset);
9019
9020         /* Second monitor config packet: high address of the sync */
9021         msg_addr_offset =
9022                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
9023                                 monitor_base;
9024
9025         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
9026                                         msg_addr_offset);
9027
9028         /*
9029          * Third monitor config packet: the payload, i.e. what to write when the
9030          * sync triggers
9031          */
9032         msg_addr_offset =
9033                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9034                                 monitor_base;
9035
9036         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9037
9038         return size;
9039 }
9040
9041 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9042                                 struct hl_gen_wait_properties *prop)
9043 {
9044         struct hl_cb *cb = (struct hl_cb *) prop->data;
9045         void *buf = cb->kernel_address;
9046         u64 fence_addr = 0;
9047         u32 size = prop->size;
9048
9049         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9050                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9051                                 prop->q_idx);
9052                 return 0;
9053         }
9054
9055         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9056         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9057                         prop->sob_mask, prop->sob_val, prop->mon_id);
9058         size += gaudi_add_fence_pkt(buf + size);
9059
9060         return size;
9061 }
9062
9063 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9064 {
9065         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9066
9067         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9068                 hw_sob->sob_id);
9069
9070         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9071                         hw_sob->sob_id * 4, 0);
9072
9073         kref_init(&hw_sob->kref);
9074 }
9075
9076 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9077 {
9078         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9079                                                         HL_POWER9_HOST_MAGIC) {
9080                 hdev->power9_64bit_dma_enable = 1;
9081                 hdev->dma_mask = 64;
9082         } else {
9083                 hdev->power9_64bit_dma_enable = 0;
9084                 hdev->dma_mask = 48;
9085         }
9086 }
9087
9088 static u64 gaudi_get_device_time(struct hl_device *hdev)
9089 {
9090         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9091
9092         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9093 }
9094
9095 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9096                                 u32 *block_size, u32 *block_id)
9097 {
9098         return -EPERM;
9099 }
9100
9101 static int gaudi_block_mmap(struct hl_device *hdev,
9102                                 struct vm_area_struct *vma,
9103                                 u32 block_id, u32 block_size)
9104 {
9105         return -EPERM;
9106 }
9107
9108 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9109 {
9110         struct cpu_dyn_regs *dyn_regs =
9111                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9112         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9113                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9114                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9115
9116         WREG32(irq_handler_offset,
9117                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9118 }
9119
9120 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9121 {
9122         switch (pll_idx) {
9123         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9124         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9125         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9126         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9127         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9128         case HL_GAUDI_MME_PLL: return MME_PLL;
9129         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9130         case HL_GAUDI_IF_PLL: return IF_PLL;
9131         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9132         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9133         default: return -EINVAL;
9134         }
9135 }
9136
9137 static int gaudi_add_sync_to_engine_map_entry(
9138         struct hl_sync_to_engine_map *map, u32 reg_value,
9139         enum hl_sync_engine_type engine_type, u32 engine_id)
9140 {
9141         struct hl_sync_to_engine_map_entry *entry;
9142
9143         /* Reg value represents a partial address of sync object,
9144          * it is used as unique identifier. For this we need to
9145          * clear the cutoff cfg base bits from the value.
9146          */
9147         if (reg_value == 0 || reg_value == 0xffffffff)
9148                 return 0;
9149         reg_value -= (u32)CFG_BASE;
9150
9151         /* create a new hash entry */
9152         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9153         if (!entry)
9154                 return -ENOMEM;
9155         entry->engine_type = engine_type;
9156         entry->engine_id = engine_id;
9157         entry->sync_id = reg_value;
9158         hash_add(map->tb, &entry->node, reg_value);
9159
9160         return 0;
9161 }
9162
9163 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9164                                 struct hl_sync_to_engine_map *map)
9165 {
9166         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9167         struct gaudi_device *gaudi = hdev->asic_specific;
9168         int i, j, rc;
9169         u32 reg_value;
9170
9171         /* Iterate over TPC engines */
9172         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9173                 /* TPC registered must be accessed with clock gating disabled */
9174                 mutex_lock(&gaudi->clk_gate_mutex);
9175                 hdev->asic_funcs->disable_clock_gating(hdev);
9176
9177                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9178                                         sds->props[SP_NEXT_TPC] * i);
9179
9180                 /* We can reenable clock_gating */
9181                 hdev->asic_funcs->set_clock_gating(hdev);
9182                 mutex_unlock(&gaudi->clk_gate_mutex);
9183
9184                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9185                                                         ENGINE_TPC, i);
9186                 if (rc)
9187                         goto free_sync_to_engine_map;
9188         }
9189
9190         /* Iterate over MME engines */
9191         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9192                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9193                         /* MME registered must be accessed with clock gating
9194                          * disabled
9195                          */
9196                         mutex_lock(&gaudi->clk_gate_mutex);
9197                         hdev->asic_funcs->disable_clock_gating(hdev);
9198
9199                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9200                                                 sds->props[SP_NEXT_MME] * i +
9201                                                 j * sizeof(u32));
9202
9203                         /* We can reenable clock_gating */
9204                         hdev->asic_funcs->set_clock_gating(hdev);
9205                         mutex_unlock(&gaudi->clk_gate_mutex);
9206
9207                         rc = gaudi_add_sync_to_engine_map_entry(
9208                                 map, reg_value, ENGINE_MME,
9209                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9210                         if (rc)
9211                                 goto free_sync_to_engine_map;
9212                 }
9213         }
9214
9215         /* Iterate over DMA engines */
9216         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9217                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9218                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9219                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9220                                                         ENGINE_DMA, i);
9221                 if (rc)
9222                         goto free_sync_to_engine_map;
9223         }
9224
9225         return 0;
9226
9227 free_sync_to_engine_map:
9228         hl_state_dump_free_sync_to_engine_map(map);
9229
9230         return rc;
9231 }
9232
9233 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9234 {
9235         return FIELD_GET(
9236                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9237                 mon->status);
9238 }
9239
9240 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9241 {
9242         const size_t max_write = 10;
9243         u32 gid, mask, sob;
9244         int i, offset;
9245
9246         /* Sync object ID is calculated as follows:
9247          * (8 * group_id + cleared bits in mask)
9248          */
9249         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9250                         mon->arm_data);
9251         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9252                         mon->arm_data);
9253
9254         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9255                 max_write; mask >>= 1, i++) {
9256                 if (!(mask & 1)) {
9257                         sob = gid * MONITOR_MAX_SOBS + i;
9258
9259                         if (offset > 0)
9260                                 offset += snprintf(sobs + offset, max_write,
9261                                                         ", ");
9262
9263                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9264                 }
9265         }
9266 }
9267
9268 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9269                                 struct hl_device *hdev,
9270                                 struct hl_mon_state_dump *mon)
9271 {
9272         const char *name;
9273         char scratch_buf1[BIN_REG_STRING_SIZE],
9274                 scratch_buf2[BIN_REG_STRING_SIZE];
9275         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9276
9277         name = hl_state_dump_get_monitor_name(hdev, mon);
9278         if (!name)
9279                 name = "";
9280
9281         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9282
9283         return hl_snprintf_resize(
9284                 buf, size, offset,
9285                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9286                 mon->id, name,
9287                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9288                                 mon->arm_data),
9289                 hl_format_as_binary(
9290                         scratch_buf1, sizeof(scratch_buf1),
9291                         FIELD_GET(
9292                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9293                                 mon->arm_data)),
9294                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9295                                 mon->arm_data),
9296                 mon->wr_data,
9297                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9298                 hl_format_as_binary(
9299                         scratch_buf2, sizeof(scratch_buf2),
9300                         FIELD_GET(
9301                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9302                                 mon->status)),
9303                 monitored_sobs);
9304 }
9305
9306
9307 static int gaudi_print_fences_single_engine(
9308         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9309         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9310         size_t *size, size_t *offset)
9311 {
9312         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9313         int rc = -ENOMEM, i;
9314         u32 *statuses, *fences;
9315
9316         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9317                         sizeof(*statuses), GFP_KERNEL);
9318         if (!statuses)
9319                 goto out;
9320
9321         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9322                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9323                          sizeof(*fences), GFP_KERNEL);
9324         if (!fences)
9325                 goto free_status;
9326
9327         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9328                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9329
9330         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9331                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9332                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9333
9334         /* The actual print */
9335         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9336                 u32 fence_id;
9337                 u64 fence_cnt, fence_rdata;
9338                 const char *engine_name;
9339
9340                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9341                         statuses[i]))
9342                         continue;
9343
9344                 fence_id =
9345                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9346                 fence_cnt = base_offset + CFG_BASE +
9347                         sizeof(u32) *
9348                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9349                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9350                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9351                 engine_name = hl_sync_engine_to_string(engine_type);
9352
9353                 rc = hl_snprintf_resize(
9354                         buf, size, offset,
9355                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9356                         engine_name, engine_id,
9357                         i, fence_id,
9358                         fence_cnt, engine_name, engine_id, fence_id, i,
9359                         fence_rdata, engine_name, engine_id, fence_id, i,
9360                         fences[fence_id],
9361                         statuses[i]);
9362                 if (rc)
9363                         goto free_fences;
9364         }
9365
9366         rc = 0;
9367
9368 free_fences:
9369         kfree(fences);
9370 free_status:
9371         kfree(statuses);
9372 out:
9373         return rc;
9374 }
9375
9376
9377 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9378         .monitor_valid = gaudi_monitor_valid,
9379         .print_single_monitor = gaudi_print_single_monitor,
9380         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9381         .print_fences_single_engine = gaudi_print_fences_single_engine,
9382 };
9383
9384 static void gaudi_state_dump_init(struct hl_device *hdev)
9385 {
9386         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9387         int i;
9388
9389         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9390                 hash_add(sds->so_id_to_str_tb,
9391                         &gaudi_so_id_to_str[i].node,
9392                         gaudi_so_id_to_str[i].id);
9393
9394         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9395                 hash_add(sds->monitor_id_to_str_tb,
9396                         &gaudi_monitor_id_to_str[i].node,
9397                         gaudi_monitor_id_to_str[i].id);
9398
9399         sds->props = gaudi_state_dump_specs_props;
9400
9401         sds->sync_namager_names = gaudi_sync_manager_names;
9402
9403         sds->funcs = gaudi_state_dump_funcs;
9404 }
9405
9406 static u32 *gaudi_get_stream_master_qid_arr(void)
9407 {
9408         return gaudi_stream_master;
9409 }
9410
9411 static const struct hl_asic_funcs gaudi_funcs = {
9412         .early_init = gaudi_early_init,
9413         .early_fini = gaudi_early_fini,
9414         .late_init = gaudi_late_init,
9415         .late_fini = gaudi_late_fini,
9416         .sw_init = gaudi_sw_init,
9417         .sw_fini = gaudi_sw_fini,
9418         .hw_init = gaudi_hw_init,
9419         .hw_fini = gaudi_hw_fini,
9420         .halt_engines = gaudi_halt_engines,
9421         .suspend = gaudi_suspend,
9422         .resume = gaudi_resume,
9423         .mmap = gaudi_mmap,
9424         .ring_doorbell = gaudi_ring_doorbell,
9425         .pqe_write = gaudi_pqe_write,
9426         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9427         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9428         .scrub_device_mem = gaudi_scrub_device_mem,
9429         .get_int_queue_base = gaudi_get_int_queue_base,
9430         .test_queues = gaudi_test_queues,
9431         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9432         .asic_dma_pool_free = gaudi_dma_pool_free,
9433         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9434         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9435         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9436         .cs_parser = gaudi_cs_parser,
9437         .asic_dma_map_sg = gaudi_dma_map_sg,
9438         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9439         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9440         .update_eq_ci = gaudi_update_eq_ci,
9441         .context_switch = gaudi_context_switch,
9442         .restore_phase_topology = gaudi_restore_phase_topology,
9443         .debugfs_read32 = gaudi_debugfs_read32,
9444         .debugfs_write32 = gaudi_debugfs_write32,
9445         .debugfs_read64 = gaudi_debugfs_read64,
9446         .debugfs_write64 = gaudi_debugfs_write64,
9447         .debugfs_read_dma = gaudi_debugfs_read_dma,
9448         .add_device_attr = hl_add_device_attr,
9449         .handle_eqe = gaudi_handle_eqe,
9450         .set_pll_profile = hl_set_pll_profile,
9451         .get_events_stat = gaudi_get_events_stat,
9452         .read_pte = gaudi_read_pte,
9453         .write_pte = gaudi_write_pte,
9454         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9455         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9456         .send_heartbeat = gaudi_send_heartbeat,
9457         .set_clock_gating = gaudi_set_clock_gating,
9458         .disable_clock_gating = gaudi_disable_clock_gating,
9459         .debug_coresight = gaudi_debug_coresight,
9460         .is_device_idle = gaudi_is_device_idle,
9461         .soft_reset_late_init = gaudi_soft_reset_late_init,
9462         .hw_queues_lock = gaudi_hw_queues_lock,
9463         .hw_queues_unlock = gaudi_hw_queues_unlock,
9464         .get_pci_id = gaudi_get_pci_id,
9465         .get_eeprom_data = gaudi_get_eeprom_data,
9466         .send_cpu_message = gaudi_send_cpu_message,
9467         .pci_bars_map = gaudi_pci_bars_map,
9468         .init_iatu = gaudi_init_iatu,
9469         .rreg = hl_rreg,
9470         .wreg = hl_wreg,
9471         .halt_coresight = gaudi_halt_coresight,
9472         .ctx_init = gaudi_ctx_init,
9473         .ctx_fini = gaudi_ctx_fini,
9474         .get_clk_rate = hl_get_clk_rate,
9475         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9476         .load_firmware_to_device = gaudi_load_firmware_to_device,
9477         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9478         .get_signal_cb_size = gaudi_get_signal_cb_size,
9479         .get_wait_cb_size = gaudi_get_wait_cb_size,
9480         .gen_signal_cb = gaudi_gen_signal_cb,
9481         .gen_wait_cb = gaudi_gen_wait_cb,
9482         .reset_sob = gaudi_reset_sob,
9483         .reset_sob_group = gaudi_reset_sob_group,
9484         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9485         .get_device_time = gaudi_get_device_time,
9486         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9487         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9488         .scramble_addr = hl_mmu_scramble_addr,
9489         .descramble_addr = hl_mmu_descramble_addr,
9490         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9491         .get_hw_block_id = gaudi_get_hw_block_id,
9492         .hw_block_mmap = gaudi_block_mmap,
9493         .enable_events_from_fw = gaudi_enable_events_from_fw,
9494         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9495         .init_firmware_loader = gaudi_init_firmware_loader,
9496         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9497         .state_dump_init = gaudi_state_dump_init,
9498         .get_sob_addr = gaudi_get_sob_addr,
9499         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9500         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9501 };
9502
9503 /**
9504  * gaudi_set_asic_funcs - set GAUDI function pointers
9505  *
9506  * @hdev: pointer to hl_device structure
9507  *
9508  */
9509 void gaudi_set_asic_funcs(struct hl_device *hdev)
9510 {
9511         hdev->asic_funcs = &gaudi_funcs;
9512 }