habanalabs: add support for encapsulated signals reservation
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115                 "gaudi cpu eq"
116 };
117
118 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
119         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
121         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
124         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
127 };
128
129 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130         [0] = GAUDI_QUEUE_ID_DMA_0_0,
131         [1] = GAUDI_QUEUE_ID_DMA_0_1,
132         [2] = GAUDI_QUEUE_ID_DMA_0_2,
133         [3] = GAUDI_QUEUE_ID_DMA_0_3,
134         [4] = GAUDI_QUEUE_ID_DMA_1_0,
135         [5] = GAUDI_QUEUE_ID_DMA_1_1,
136         [6] = GAUDI_QUEUE_ID_DMA_1_2,
137         [7] = GAUDI_QUEUE_ID_DMA_1_3,
138 };
139
140 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
142         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
143         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
144         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
145         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
146         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
147         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
148         [PACKET_FENCE]          = sizeof(struct packet_fence),
149         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
150         [PACKET_NOP]            = sizeof(struct packet_nop),
151         [PACKET_STOP]           = sizeof(struct packet_stop),
152         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
153         [PACKET_WAIT]           = sizeof(struct packet_wait),
154         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
155 };
156
157 static inline bool validate_packet_id(enum packet_id id)
158 {
159         switch (id) {
160         case PACKET_WREG_32:
161         case PACKET_WREG_BULK:
162         case PACKET_MSG_LONG:
163         case PACKET_MSG_SHORT:
164         case PACKET_CP_DMA:
165         case PACKET_REPEAT:
166         case PACKET_MSG_PROT:
167         case PACKET_FENCE:
168         case PACKET_LIN_DMA:
169         case PACKET_NOP:
170         case PACKET_STOP:
171         case PACKET_ARB_POINT:
172         case PACKET_WAIT:
173         case PACKET_LOAD_AND_EXE:
174                 return true;
175         default:
176                 return false;
177         }
178 }
179
180 static const char * const
181 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182         "tpc_address_exceed_slm",
183         "tpc_div_by_0",
184         "tpc_spu_mac_overflow",
185         "tpc_spu_addsub_overflow",
186         "tpc_spu_abs_overflow",
187         "tpc_spu_fp_dst_nan_inf",
188         "tpc_spu_fp_dst_denorm",
189         "tpc_vpu_mac_overflow",
190         "tpc_vpu_addsub_overflow",
191         "tpc_vpu_abs_overflow",
192         "tpc_vpu_fp_dst_nan_inf",
193         "tpc_vpu_fp_dst_denorm",
194         "tpc_assertions",
195         "tpc_illegal_instruction",
196         "tpc_pc_wrap_around",
197         "tpc_qm_sw_err",
198         "tpc_hbw_rresp_err",
199         "tpc_hbw_bresp_err",
200         "tpc_lbw_rresp_err",
201         "tpc_lbw_bresp_err"
202 };
203
204 static const char * const
205 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
206         "PQ AXI HBW error",
207         "CQ AXI HBW error",
208         "CP AXI HBW error",
209         "CP error due to undefined OPCODE",
210         "CP encountered STOP OPCODE",
211         "CP AXI LBW error",
212         "CP WRREG32 or WRBULK returned error",
213         "N/A",
214         "FENCE 0 inc over max value and clipped",
215         "FENCE 1 inc over max value and clipped",
216         "FENCE 2 inc over max value and clipped",
217         "FENCE 3 inc over max value and clipped",
218         "FENCE 0 dec under min value and clipped",
219         "FENCE 1 dec under min value and clipped",
220         "FENCE 2 dec under min value and clipped",
221         "FENCE 3 dec under min value and clipped"
222 };
223
224 static const char * const
225 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226         "Choice push while full error",
227         "Choice Q watchdog error",
228         "MSG AXI LBW returned with error"
229 };
230
231 enum gaudi_sm_sei_cause {
232         GAUDI_SM_SEI_SO_OVERFLOW,
233         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234         GAUDI_SM_SEI_AXI_RESPONSE_ERR
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 /* The order here is opposite to the order of the indexing in the h/w.
434  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
435  */
436 static const char * const gaudi_sync_manager_names[] = {
437         "SYNC_MGR_E_N",
438         "SYNC_MGR_W_N",
439         "SYNC_MGR_E_S",
440         "SYNC_MGR_W_S",
441         NULL
442 };
443
444 struct ecc_info_extract_params {
445         u64 block_address;
446         u32 num_memories;
447         bool derr;
448         bool disable_clock_gating;
449 };
450
451 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
452                                                                 u64 phys_addr);
453 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
454                                         struct hl_cs_job *job);
455 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
456                                         u32 size, u64 val);
457 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
458                                         u32 num_regs, u32 val);
459 static int gaudi_schedule_register_memset(struct hl_device *hdev,
460                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
461 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
462                                 u32 tpc_id);
463 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
464 static int gaudi_cpucp_info_get(struct hl_device *hdev);
465 static void gaudi_disable_clock_gating(struct hl_device *hdev);
466 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
467 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
468                                 u32 size, bool eb);
469 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
470                                 struct hl_gen_wait_properties *prop);
471
472 static inline enum hl_collective_mode
473 get_collective_mode(struct hl_device *hdev, u32 queue_id)
474 {
475         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
476                 return HL_COLLECTIVE_MASTER;
477
478         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
479                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
480                 return HL_COLLECTIVE_SLAVE;
481
482         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
483                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
484                 return HL_COLLECTIVE_SLAVE;
485
486         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
487                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
488                 return HL_COLLECTIVE_SLAVE;
489
490         return HL_COLLECTIVE_NOT_SUPPORTED;
491 }
492
493 static inline void set_default_power_values(struct hl_device *hdev)
494 {
495         struct asic_fixed_properties *prop = &hdev->asic_prop;
496
497         if (hdev->card_type == cpucp_card_type_pmc) {
498                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
499                 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
500         } else {
501                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
502                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
503         }
504 }
505
506 static int gaudi_set_fixed_properties(struct hl_device *hdev)
507 {
508         struct asic_fixed_properties *prop = &hdev->asic_prop;
509         u32 num_sync_stream_queues = 0;
510         int i;
511
512         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
513         prop->hw_queues_props = kcalloc(prop->max_queues,
514                         sizeof(struct hw_queue_properties),
515                         GFP_KERNEL);
516
517         if (!prop->hw_queues_props)
518                 return -ENOMEM;
519
520         for (i = 0 ; i < prop->max_queues ; i++) {
521                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
522                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
523                         prop->hw_queues_props[i].driver_only = 0;
524                         prop->hw_queues_props[i].supports_sync_stream = 1;
525                         prop->hw_queues_props[i].cb_alloc_flags =
526                                 CB_ALLOC_KERNEL;
527                         num_sync_stream_queues++;
528                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
529                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
530                         prop->hw_queues_props[i].driver_only = 1;
531                         prop->hw_queues_props[i].supports_sync_stream = 0;
532                         prop->hw_queues_props[i].cb_alloc_flags =
533                                 CB_ALLOC_KERNEL;
534                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
535                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
536                         prop->hw_queues_props[i].driver_only = 0;
537                         prop->hw_queues_props[i].supports_sync_stream = 0;
538                         prop->hw_queues_props[i].cb_alloc_flags =
539                                 CB_ALLOC_USER;
540
541                 }
542                 prop->hw_queues_props[i].collective_mode =
543                                                 get_collective_mode(hdev, i);
544         }
545
546         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
547         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
548         prop->collective_first_sob = 0;
549         prop->collective_first_mon = 0;
550
551         /* 2 SOBs per internal queue stream are reserved for collective */
552         prop->sync_stream_first_sob =
553                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
554                         * QMAN_STREAMS * HL_RSVD_SOBS;
555
556         /* 1 monitor per internal queue stream are reserved for collective
557          * 2 monitors per external queue stream are reserved for collective
558          */
559         prop->sync_stream_first_mon =
560                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
561                         (NUMBER_OF_EXT_HW_QUEUES * 2);
562
563         prop->dram_base_address = DRAM_PHYS_BASE;
564         prop->dram_size = GAUDI_HBM_SIZE_32GB;
565         prop->dram_end_address = prop->dram_base_address +
566                                         prop->dram_size;
567         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
568
569         prop->sram_base_address = SRAM_BASE_ADDR;
570         prop->sram_size = SRAM_SIZE;
571         prop->sram_end_address = prop->sram_base_address +
572                                         prop->sram_size;
573         prop->sram_user_base_address = prop->sram_base_address +
574                                         SRAM_USER_BASE_OFFSET;
575
576         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
577         if (hdev->pldm)
578                 prop->mmu_pgt_size = 0x800000; /* 8MB */
579         else
580                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
581         prop->mmu_pte_size = HL_PTE_SIZE;
582         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
583         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
584         prop->dram_page_size = PAGE_SIZE_2MB;
585         prop->dram_supports_virtual_memory = false;
586
587         prop->pmmu.hop0_shift = HOP0_SHIFT;
588         prop->pmmu.hop1_shift = HOP1_SHIFT;
589         prop->pmmu.hop2_shift = HOP2_SHIFT;
590         prop->pmmu.hop3_shift = HOP3_SHIFT;
591         prop->pmmu.hop4_shift = HOP4_SHIFT;
592         prop->pmmu.hop0_mask = HOP0_MASK;
593         prop->pmmu.hop1_mask = HOP1_MASK;
594         prop->pmmu.hop2_mask = HOP2_MASK;
595         prop->pmmu.hop3_mask = HOP3_MASK;
596         prop->pmmu.hop4_mask = HOP4_MASK;
597         prop->pmmu.start_addr = VA_HOST_SPACE_START;
598         prop->pmmu.end_addr =
599                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
600         prop->pmmu.page_size = PAGE_SIZE_4KB;
601         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
602
603         /* PMMU and HPMMU are the same except of page size */
604         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
605         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
606
607         /* shifts and masks are the same in PMMU and DMMU */
608         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
609         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
610         prop->dmmu.end_addr = VA_HOST_SPACE_END;
611         prop->dmmu.page_size = PAGE_SIZE_2MB;
612
613         prop->cfg_size = CFG_SIZE;
614         prop->max_asid = MAX_ASID;
615         prop->num_of_events = GAUDI_EVENT_SIZE;
616         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
617
618         set_default_power_values(hdev);
619
620         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
621         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
622
623         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
624         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
625
626         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
627                                         CARD_NAME_MAX_LEN);
628
629         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
630
631         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
632                         prop->sync_stream_first_sob +
633                         (num_sync_stream_queues * HL_RSVD_SOBS);
634         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
635                         prop->sync_stream_first_mon +
636                         (num_sync_stream_queues * HL_RSVD_MONS);
637
638         prop->first_available_user_msix_interrupt = USHRT_MAX;
639
640         for (i = 0 ; i < HL_MAX_DCORES ; i++)
641                 prop->first_available_cq[i] = USHRT_MAX;
642
643         prop->fw_cpu_boot_dev_sts0_valid = false;
644         prop->fw_cpu_boot_dev_sts1_valid = false;
645         prop->hard_reset_done_by_fw = false;
646         prop->gic_interrupts_enable = true;
647
648         return 0;
649 }
650
651 static int gaudi_pci_bars_map(struct hl_device *hdev)
652 {
653         static const char * const name[] = {"SRAM", "CFG", "HBM"};
654         bool is_wc[3] = {false, false, true};
655         int rc;
656
657         rc = hl_pci_bars_map(hdev, name, is_wc);
658         if (rc)
659                 return rc;
660
661         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
662                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
663
664         return 0;
665 }
666
667 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
668 {
669         struct gaudi_device *gaudi = hdev->asic_specific;
670         struct hl_inbound_pci_region pci_region;
671         u64 old_addr = addr;
672         int rc;
673
674         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
675                 return old_addr;
676
677         if (hdev->asic_prop.iatu_done_by_fw)
678                 return U64_MAX;
679
680         /* Inbound Region 2 - Bar 4 - Point to HBM */
681         pci_region.mode = PCI_BAR_MATCH_MODE;
682         pci_region.bar = HBM_BAR_ID;
683         pci_region.addr = addr;
684         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
685         if (rc)
686                 return U64_MAX;
687
688         if (gaudi) {
689                 old_addr = gaudi->hbm_bar_cur_addr;
690                 gaudi->hbm_bar_cur_addr = addr;
691         }
692
693         return old_addr;
694 }
695
696 static int gaudi_init_iatu(struct hl_device *hdev)
697 {
698         struct hl_inbound_pci_region inbound_region;
699         struct hl_outbound_pci_region outbound_region;
700         int rc;
701
702         if (hdev->asic_prop.iatu_done_by_fw)
703                 return 0;
704
705         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
706         inbound_region.mode = PCI_BAR_MATCH_MODE;
707         inbound_region.bar = SRAM_BAR_ID;
708         inbound_region.addr = SRAM_BASE_ADDR;
709         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
710         if (rc)
711                 goto done;
712
713         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
714         inbound_region.mode = PCI_BAR_MATCH_MODE;
715         inbound_region.bar = CFG_BAR_ID;
716         inbound_region.addr = SPI_FLASH_BASE_ADDR;
717         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
718         if (rc)
719                 goto done;
720
721         /* Inbound Region 2 - Bar 4 - Point to HBM */
722         inbound_region.mode = PCI_BAR_MATCH_MODE;
723         inbound_region.bar = HBM_BAR_ID;
724         inbound_region.addr = DRAM_PHYS_BASE;
725         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
726         if (rc)
727                 goto done;
728
729         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
730
731         /* Outbound Region 0 - Point to Host */
732         outbound_region.addr = HOST_PHYS_BASE;
733         outbound_region.size = HOST_PHYS_SIZE;
734         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
735
736 done:
737         return rc;
738 }
739
740 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
741 {
742         return RREG32(mmHW_STATE);
743 }
744
745 static int gaudi_early_init(struct hl_device *hdev)
746 {
747         struct asic_fixed_properties *prop = &hdev->asic_prop;
748         struct pci_dev *pdev = hdev->pdev;
749         u32 fw_boot_status;
750         int rc;
751
752         rc = gaudi_set_fixed_properties(hdev);
753         if (rc) {
754                 dev_err(hdev->dev, "Failed setting fixed properties\n");
755                 return rc;
756         }
757
758         /* Check BAR sizes */
759         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
760                 dev_err(hdev->dev,
761                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
762                         SRAM_BAR_ID,
763                         (unsigned long long) pci_resource_len(pdev,
764                                                         SRAM_BAR_ID),
765                         SRAM_BAR_SIZE);
766                 rc = -ENODEV;
767                 goto free_queue_props;
768         }
769
770         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
771                 dev_err(hdev->dev,
772                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
773                         CFG_BAR_ID,
774                         (unsigned long long) pci_resource_len(pdev,
775                                                                 CFG_BAR_ID),
776                         CFG_BAR_SIZE);
777                 rc = -ENODEV;
778                 goto free_queue_props;
779         }
780
781         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
782
783         /* If FW security is enabled at this point it means no access to ELBI */
784         if (hdev->asic_prop.fw_security_enabled) {
785                 hdev->asic_prop.iatu_done_by_fw = true;
786
787                 /*
788                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
789                  * decision can only be taken based on PCI ID security.
790                  */
791                 hdev->asic_prop.gic_interrupts_enable = false;
792                 goto pci_init;
793         }
794
795         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
796                                 &fw_boot_status);
797         if (rc)
798                 goto free_queue_props;
799
800         /* Check whether FW is configuring iATU */
801         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
802                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
803                 hdev->asic_prop.iatu_done_by_fw = true;
804
805 pci_init:
806         rc = hl_pci_init(hdev);
807         if (rc)
808                 goto free_queue_props;
809
810         /* Before continuing in the initialization, we need to read the preboot
811          * version to determine whether we run with a security-enabled firmware
812          */
813         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
814                                         mmCPU_BOOT_DEV_STS0,
815                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
816                                         mmCPU_BOOT_ERR1,
817                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
818         if (rc) {
819                 if (hdev->reset_on_preboot_fail)
820                         hdev->asic_funcs->hw_fini(hdev, true);
821                 goto pci_fini;
822         }
823
824         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
825                 dev_info(hdev->dev,
826                         "H/W state is dirty, must reset before initializing\n");
827                 hdev->asic_funcs->hw_fini(hdev, true);
828         }
829
830         return 0;
831
832 pci_fini:
833         hl_pci_fini(hdev);
834 free_queue_props:
835         kfree(hdev->asic_prop.hw_queues_props);
836         return rc;
837 }
838
839 static int gaudi_early_fini(struct hl_device *hdev)
840 {
841         kfree(hdev->asic_prop.hw_queues_props);
842         hl_pci_fini(hdev);
843
844         return 0;
845 }
846
847 /**
848  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
849  *
850  * @hdev: pointer to hl_device structure
851  *
852  */
853 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
854 {
855         struct asic_fixed_properties *prop = &hdev->asic_prop;
856         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
857         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
858         int rc;
859
860         if (hdev->asic_prop.fw_security_enabled) {
861                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
862
863                 if (rc)
864                         return rc;
865
866                 freq = pll_freq_arr[2];
867         } else {
868                 /* Backward compatibility */
869                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
870                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
871                 nr = RREG32(mmPSOC_CPU_PLL_NR);
872                 nf = RREG32(mmPSOC_CPU_PLL_NF);
873                 od = RREG32(mmPSOC_CPU_PLL_OD);
874
875                 if (div_sel == DIV_SEL_REF_CLK ||
876                                 div_sel == DIV_SEL_DIVIDED_REF) {
877                         if (div_sel == DIV_SEL_REF_CLK)
878                                 freq = PLL_REF_CLK;
879                         else
880                                 freq = PLL_REF_CLK / (div_fctr + 1);
881                 } else if (div_sel == DIV_SEL_PLL_CLK ||
882                         div_sel == DIV_SEL_DIVIDED_PLL) {
883                         pll_clk = PLL_REF_CLK * (nf + 1) /
884                                         ((nr + 1) * (od + 1));
885                         if (div_sel == DIV_SEL_PLL_CLK)
886                                 freq = pll_clk;
887                         else
888                                 freq = pll_clk / (div_fctr + 1);
889                 } else {
890                         dev_warn(hdev->dev,
891                                 "Received invalid div select value: %d",
892                                 div_sel);
893                         freq = 0;
894                 }
895         }
896
897         prop->psoc_timestamp_frequency = freq;
898         prop->psoc_pci_pll_nr = nr;
899         prop->psoc_pci_pll_nf = nf;
900         prop->psoc_pci_pll_od = od;
901         prop->psoc_pci_pll_div_factor = div_fctr;
902
903         return 0;
904 }
905
906 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
907                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
908 {
909         struct asic_fixed_properties *prop = &hdev->asic_prop;
910         struct packet_lin_dma *init_tpc_mem_pkt;
911         struct hl_cs_job *job;
912         struct hl_cb *cb;
913         u64 dst_addr;
914         u32 cb_size, ctl;
915         u8 tpc_id;
916         int rc;
917
918         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
919         if (!cb)
920                 return -EFAULT;
921
922         init_tpc_mem_pkt = cb->kernel_address;
923         cb_size = sizeof(*init_tpc_mem_pkt);
924         memset(init_tpc_mem_pkt, 0, cb_size);
925
926         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
927
928         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
929         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
930         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
931         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
932
933         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
934
935         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
936         dst_addr = (prop->sram_user_base_address &
937                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
938                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
939         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
940
941         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
942         if (!job) {
943                 dev_err(hdev->dev, "Failed to allocate a new job\n");
944                 rc = -ENOMEM;
945                 goto release_cb;
946         }
947
948         job->id = 0;
949         job->user_cb = cb;
950         atomic_inc(&job->user_cb->cs_cnt);
951         job->user_cb_size = cb_size;
952         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
953         job->patched_cb = job->user_cb;
954         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
955
956         hl_debugfs_add_job(hdev, job);
957
958         rc = gaudi_send_job_on_qman0(hdev, job);
959
960         if (rc)
961                 goto free_job;
962
963         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
964                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
965                 if (rc)
966                         break;
967         }
968
969 free_job:
970         hl_userptr_delete_list(hdev, &job->userptr_list);
971         hl_debugfs_remove_job(hdev, job);
972         kfree(job);
973         atomic_dec(&cb->cs_cnt);
974
975 release_cb:
976         hl_cb_put(cb);
977         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
978
979         return rc;
980 }
981
982 /*
983  * gaudi_init_tpc_mem() - Initialize TPC memories.
984  * @hdev: Pointer to hl_device structure.
985  *
986  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
987  *
988  * Return: 0 for success, negative value for error.
989  */
990 static int gaudi_init_tpc_mem(struct hl_device *hdev)
991 {
992         const struct firmware *fw;
993         size_t fw_size;
994         void *cpu_addr;
995         dma_addr_t dma_handle;
996         int rc, count = 5;
997
998 again:
999         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1000         if (rc == -EINTR && count-- > 0) {
1001                 msleep(50);
1002                 goto again;
1003         }
1004
1005         if (rc) {
1006                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1007                                 GAUDI_TPC_FW_FILE);
1008                 goto out;
1009         }
1010
1011         fw_size = fw->size;
1012         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1013                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1014         if (!cpu_addr) {
1015                 dev_err(hdev->dev,
1016                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1017                         fw_size);
1018                 rc = -ENOMEM;
1019                 goto out;
1020         }
1021
1022         memcpy(cpu_addr, fw->data, fw_size);
1023
1024         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1025
1026         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1027                         dma_handle);
1028
1029 out:
1030         release_firmware(fw);
1031         return rc;
1032 }
1033
1034 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1035 {
1036         struct gaudi_device *gaudi = hdev->asic_specific;
1037         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1038         struct hl_hw_queue *q;
1039         u32 i, sob_id, sob_group_id, queue_id;
1040
1041         /* Iterate through SOB groups and assign a SOB for each slave queue */
1042         sob_group_id =
1043                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1044         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1045
1046         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1047         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1048                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1049                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1050         }
1051
1052         /* Both DMA5 and TPC7 use the same resources since only a single
1053          * engine need to participate in the reduction process
1054          */
1055         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1056         q = &hdev->kernel_queues[queue_id];
1057         q->sync_stream_prop.collective_sob_id =
1058                         sob_id + NIC_NUMBER_OF_ENGINES;
1059
1060         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1061         q = &hdev->kernel_queues[queue_id];
1062         q->sync_stream_prop.collective_sob_id =
1063                         sob_id + NIC_NUMBER_OF_ENGINES;
1064 }
1065
1066 static void gaudi_sob_group_hw_reset(struct kref *ref)
1067 {
1068         struct gaudi_hw_sob_group *hw_sob_group =
1069                 container_of(ref, struct gaudi_hw_sob_group, kref);
1070         struct hl_device *hdev = hw_sob_group->hdev;
1071         u64 base_addr;
1072         int rc;
1073
1074         base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1075                         hw_sob_group->base_sob_id * 4;
1076         rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1077                         base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1078         if (rc)
1079                 dev_err(hdev->dev,
1080                         "failed resetting sob group - sob base %u, count %u",
1081                         hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
1082
1083         kref_init(&hw_sob_group->kref);
1084 }
1085
1086 static void gaudi_sob_group_reset_error(struct kref *ref)
1087 {
1088         struct gaudi_hw_sob_group *hw_sob_group =
1089                 container_of(ref, struct gaudi_hw_sob_group, kref);
1090         struct hl_device *hdev = hw_sob_group->hdev;
1091
1092         dev_crit(hdev->dev,
1093                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1094                 hw_sob_group->base_sob_id);
1095 }
1096
1097 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1098 {
1099         struct gaudi_collective_properties *prop;
1100         int i;
1101
1102         prop = &gaudi->collective_props;
1103
1104         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1105
1106         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1107                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1108                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1109                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1110         /* Set collective engine bit */
1111         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1112                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1113 }
1114
1115 static int gaudi_collective_init(struct hl_device *hdev)
1116 {
1117         u32 i, sob_id, reserved_sobs_per_group;
1118         struct gaudi_collective_properties *prop;
1119         struct gaudi_device *gaudi;
1120
1121         gaudi = hdev->asic_specific;
1122         prop = &gaudi->collective_props;
1123         sob_id = hdev->asic_prop.collective_first_sob;
1124
1125         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1126         reserved_sobs_per_group =
1127                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1128
1129         /* Init SOB groups */
1130         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1131                 prop->hw_sob_group[i].hdev = hdev;
1132                 prop->hw_sob_group[i].base_sob_id = sob_id;
1133                 sob_id += reserved_sobs_per_group;
1134                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1135         }
1136
1137         for (i = 0 ; i < QMAN_STREAMS; i++) {
1138                 prop->next_sob_group_val[i] = 1;
1139                 prop->curr_sob_group_idx[i] = 0;
1140                 gaudi_collective_map_sobs(hdev, i);
1141         }
1142
1143         gaudi_collective_mstr_sob_mask_set(gaudi);
1144
1145         return 0;
1146 }
1147
1148 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1149 {
1150         struct gaudi_device *gaudi = hdev->asic_specific;
1151         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1152
1153         kref_put(&cprop->hw_sob_group[sob_group].kref,
1154                                         gaudi_sob_group_hw_reset);
1155 }
1156
1157 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1158                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1159 {
1160         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1161         struct gaudi_collective_properties *cprop;
1162         struct hl_gen_wait_properties wait_prop;
1163         struct hl_sync_stream_properties *prop;
1164         struct gaudi_device *gaudi;
1165
1166         gaudi = hdev->asic_specific;
1167         cprop = &gaudi->collective_props;
1168         queue_id = job->hw_queue_id;
1169         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1170
1171         master_sob_base =
1172                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1173         master_monitor = prop->collective_mstr_mon_id[0];
1174
1175         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1176
1177         dev_dbg(hdev->dev,
1178                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1179                 master_sob_base, cprop->mstr_sob_mask[0],
1180                 cprop->next_sob_group_val[stream],
1181                 master_monitor, queue_id);
1182
1183         wait_prop.data = (void *) job->patched_cb;
1184         wait_prop.sob_base = master_sob_base;
1185         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1186         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1187         wait_prop.mon_id = master_monitor;
1188         wait_prop.q_idx = queue_id;
1189         wait_prop.size = cb_size;
1190         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1191
1192         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1193         master_monitor = prop->collective_mstr_mon_id[1];
1194
1195         dev_dbg(hdev->dev,
1196                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1197                 master_sob_base, cprop->mstr_sob_mask[1],
1198                 cprop->next_sob_group_val[stream],
1199                 master_monitor, queue_id);
1200
1201         wait_prop.sob_base = master_sob_base;
1202         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1203         wait_prop.mon_id = master_monitor;
1204         wait_prop.size = cb_size;
1205         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1206 }
1207
1208 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1209                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1210 {
1211         struct hl_gen_wait_properties wait_prop;
1212         struct hl_sync_stream_properties *prop;
1213         u32 queue_id, cb_size = 0;
1214
1215         queue_id = job->hw_queue_id;
1216         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1217
1218         /* Add to wait CBs using slave monitor */
1219         wait_prop.data = (void *) job->user_cb;
1220         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1221         wait_prop.sob_mask = 0x1;
1222         wait_prop.sob_val = cs_cmpl->sob_val;
1223         wait_prop.mon_id = prop->collective_slave_mon_id;
1224         wait_prop.q_idx = queue_id;
1225         wait_prop.size = cb_size;
1226
1227         dev_dbg(hdev->dev,
1228                 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1229                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1230                 prop->collective_slave_mon_id, queue_id);
1231
1232         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1233
1234         dev_dbg(hdev->dev,
1235                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1236                 prop->collective_sob_id, queue_id);
1237
1238         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1239                         prop->collective_sob_id, cb_size, false);
1240 }
1241
1242 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1243 {
1244         struct hl_cs_compl *signal_cs_cmpl =
1245                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1246         struct hl_cs_compl *cs_cmpl =
1247                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1248         struct gaudi_collective_properties *cprop;
1249         u32 stream, queue_id, sob_group_offset;
1250         struct gaudi_device *gaudi;
1251         struct hl_device *hdev;
1252         struct hl_cs_job *job;
1253         struct hl_ctx *ctx;
1254
1255         ctx = cs->ctx;
1256         hdev = ctx->hdev;
1257         gaudi = hdev->asic_specific;
1258         cprop = &gaudi->collective_props;
1259
1260         /* copy the SOB id and value of the signal CS */
1261         cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1262         cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1263
1264         /* check again if the signal cs already completed.
1265          * if yes then don't send any wait cs since the hw_sob
1266          * could be in reset already. if signal is not completed
1267          * then get refcount to hw_sob to prevent resetting the sob
1268          * while wait cs is not submitted.
1269          * note that this check is protected by two locks,
1270          * hw queue lock and completion object lock,
1271          * and the same completion object lock also protects
1272          * the hw_sob reset handler function.
1273          * The hw_queue lock prevent out of sync of hw_sob
1274          * refcount value, changed by signal/wait flows.
1275          */
1276         spin_lock(&signal_cs_cmpl->lock);
1277
1278         if (completion_done(&cs->signal_fence->completion)) {
1279                 spin_unlock(&signal_cs_cmpl->lock);
1280                 return -EINVAL;
1281         }
1282         /* Increment kref since all slave queues are now waiting on it */
1283         kref_get(&cs_cmpl->hw_sob->kref);
1284
1285         spin_unlock(&signal_cs_cmpl->lock);
1286
1287         /* Calculate the stream from collective master queue (1st job) */
1288         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1289         stream = job->hw_queue_id % 4;
1290         sob_group_offset =
1291                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1292
1293         list_for_each_entry(job, &cs->job_list, cs_node) {
1294                 queue_id = job->hw_queue_id;
1295
1296                 if (hdev->kernel_queues[queue_id].collective_mode ==
1297                                 HL_COLLECTIVE_MASTER)
1298                         gaudi_collective_master_init_job(hdev, job, stream,
1299                                                 sob_group_offset);
1300                 else
1301                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1302         }
1303
1304         cs_cmpl->sob_group = sob_group_offset;
1305
1306         /* Handle sob group kref and wraparound */
1307         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1308         cprop->next_sob_group_val[stream]++;
1309
1310         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1311                 /*
1312                  * Decrement as we reached the max value.
1313                  * The release function won't be called here as we've
1314                  * just incremented the refcount.
1315                  */
1316                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1317                                 gaudi_sob_group_reset_error);
1318                 cprop->next_sob_group_val[stream] = 1;
1319                 /* only two SOBs are currently in use */
1320                 cprop->curr_sob_group_idx[stream] =
1321                         (cprop->curr_sob_group_idx[stream] + 1) &
1322                                                         (HL_RSVD_SOBS - 1);
1323
1324                 gaudi_collective_map_sobs(hdev, stream);
1325
1326                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1327                                 cprop->curr_sob_group_idx[stream], stream);
1328         }
1329
1330         mb();
1331         hl_fence_put(cs->signal_fence);
1332         cs->signal_fence = NULL;
1333
1334         return 0;
1335 }
1336
1337 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1338                 struct hl_ctx *ctx, struct hl_cs *cs,
1339                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1340 {
1341         struct hw_queue_properties *hw_queue_prop;
1342         struct hl_cs_counters_atomic *cntr;
1343         struct hl_cs_job *job;
1344         struct hl_cb *cb;
1345         u32 cb_size;
1346         bool patched_cb;
1347
1348         cntr = &hdev->aggregated_cs_counters;
1349
1350         if (mode == HL_COLLECTIVE_MASTER) {
1351                 /* CB size of collective master queue contains
1352                  * 4 msg short packets for monitor 1 configuration
1353                  * 1 fence packet
1354                  * 4 msg short packets for monitor 2 configuration
1355                  * 1 fence packet
1356                  * 2 msg prot packets for completion and MSI-X
1357                  */
1358                 cb_size = sizeof(struct packet_msg_short) * 8 +
1359                                 sizeof(struct packet_fence) * 2 +
1360                                 sizeof(struct packet_msg_prot) * 2;
1361                 patched_cb = true;
1362         } else {
1363                 /* CB size of collective slave queues contains
1364                  * 4 msg short packets for monitor configuration
1365                  * 1 fence packet
1366                  * 1 additional msg short packet for sob signal
1367                  */
1368                 cb_size = sizeof(struct packet_msg_short) * 5 +
1369                                 sizeof(struct packet_fence);
1370                 patched_cb = false;
1371         }
1372
1373         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1374         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1375         if (!job) {
1376                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1377                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1378                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1379                 return -ENOMEM;
1380         }
1381
1382         /* Allocate internal mapped CB for non patched CBs */
1383         cb = hl_cb_kernel_create(hdev, cb_size,
1384                         hdev->mmu_enable && !patched_cb);
1385         if (!cb) {
1386                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1387                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1388                 kfree(job);
1389                 return -EFAULT;
1390         }
1391
1392         job->id = 0;
1393         job->cs = cs;
1394         job->user_cb = cb;
1395         atomic_inc(&job->user_cb->cs_cnt);
1396         job->user_cb_size = cb_size;
1397         job->hw_queue_id = queue_id;
1398
1399         /*
1400          * No need in parsing, user CB is the patched CB.
1401          * We call hl_cb_destroy() out of two reasons - we don't need
1402          * the CB in the CB idr anymore and to decrement its refcount as
1403          * it was incremented inside hl_cb_kernel_create().
1404          */
1405         if (patched_cb)
1406                 job->patched_cb = job->user_cb;
1407         else
1408                 job->patched_cb = NULL;
1409
1410         job->job_cb_size = job->user_cb_size;
1411         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1412
1413         /* increment refcount as for external queues we get completion */
1414         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1415                 cs_get(cs);
1416
1417         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1418
1419         list_add_tail(&job->cs_node, &cs->job_list);
1420
1421         hl_debugfs_add_job(hdev, job);
1422
1423         return 0;
1424 }
1425
1426 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1427                 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1428                 u32 collective_engine_id)
1429 {
1430         struct gaudi_device *gaudi = hdev->asic_specific;
1431         struct hw_queue_properties *hw_queue_prop;
1432         u32 queue_id, collective_queue, num_jobs;
1433         u32 stream, nic_queue, nic_idx = 0;
1434         bool skip;
1435         int i, rc = 0;
1436
1437         /* Verify wait queue id is configured as master */
1438         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1439         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1440                 dev_err(hdev->dev,
1441                         "Queue %d is not configured as collective master\n",
1442                         wait_queue_id);
1443                 return -EINVAL;
1444         }
1445
1446         /* Verify engine id is supported */
1447         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1448                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1449                 dev_err(hdev->dev,
1450                         "Collective wait does not support engine %u\n",
1451                         collective_engine_id);
1452                 return -EINVAL;
1453         }
1454
1455         stream = wait_queue_id % 4;
1456
1457         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1458                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1459         else
1460                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1461
1462         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1463         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1464
1465         /* First job goes to the collective master queue, it will wait for
1466          * the collective slave queues to finish execution.
1467          * The synchronization is done using two monitors:
1468          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1469          * reduction engine (DMA5/TPC7).
1470          *
1471          * Rest of the jobs goes to the collective slave queues which will
1472          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1473          */
1474         for (i = 0 ; i < num_jobs ; i++) {
1475                 if (i == 0) {
1476                         queue_id = wait_queue_id;
1477                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1478                                 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1479                 } else {
1480                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1481                                 if (gaudi->hw_cap_initialized &
1482                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1483                                         skip = false;
1484                                 else
1485                                         skip = true;
1486
1487                                 queue_id = nic_queue;
1488                                 nic_queue += 4;
1489                                 nic_idx++;
1490
1491                                 if (skip)
1492                                         continue;
1493                         } else {
1494                                 queue_id = collective_queue;
1495                         }
1496
1497                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1498                                 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1499                 }
1500
1501                 if (rc)
1502                         return rc;
1503         }
1504
1505         return rc;
1506 }
1507
1508 static int gaudi_late_init(struct hl_device *hdev)
1509 {
1510         struct gaudi_device *gaudi = hdev->asic_specific;
1511         int rc;
1512
1513         rc = gaudi->cpucp_info_get(hdev);
1514         if (rc) {
1515                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1516                 return rc;
1517         }
1518
1519         if ((hdev->card_type == cpucp_card_type_pci) &&
1520                         (hdev->nic_ports_mask & 0x3)) {
1521                 dev_info(hdev->dev,
1522                         "PCI card detected, only 8 ports are enabled\n");
1523                 hdev->nic_ports_mask &= ~0x3;
1524
1525                 /* Stop and disable unused NIC QMANs */
1526                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1527                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1528                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1529
1530                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1531                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1532                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1533
1534                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1535                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1536
1537                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1538         }
1539
1540         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1541         if (rc) {
1542                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1543                 return rc;
1544         }
1545
1546         rc = gaudi_fetch_psoc_frequency(hdev);
1547         if (rc) {
1548                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1549                 goto disable_pci_access;
1550         }
1551
1552         rc = gaudi_mmu_clear_pgt_range(hdev);
1553         if (rc) {
1554                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1555                 goto disable_pci_access;
1556         }
1557
1558         rc = gaudi_init_tpc_mem(hdev);
1559         if (rc) {
1560                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1561                 goto disable_pci_access;
1562         }
1563
1564         rc = gaudi_collective_init(hdev);
1565         if (rc) {
1566                 dev_err(hdev->dev, "Failed to init collective\n");
1567                 goto disable_pci_access;
1568         }
1569
1570         return 0;
1571
1572 disable_pci_access:
1573         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1574
1575         return rc;
1576 }
1577
1578 static void gaudi_late_fini(struct hl_device *hdev)
1579 {
1580         const struct hwmon_channel_info **channel_info_arr;
1581         int i = 0;
1582
1583         if (!hdev->hl_chip_info->info)
1584                 return;
1585
1586         channel_info_arr = hdev->hl_chip_info->info;
1587
1588         while (channel_info_arr[i]) {
1589                 kfree(channel_info_arr[i]->config);
1590                 kfree(channel_info_arr[i]);
1591                 i++;
1592         }
1593
1594         kfree(channel_info_arr);
1595
1596         hdev->hl_chip_info->info = NULL;
1597 }
1598
1599 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1600 {
1601         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1602         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1603         int i, j, rc = 0;
1604
1605         /*
1606          * The device CPU works with 40-bits addresses, while bit 39 must be set
1607          * to '1' when accessing the host.
1608          * Bits 49:39 of the full host address are saved for a later
1609          * configuration of the HW to perform extension to 50 bits.
1610          * Because there is a single HW register that holds the extension bits,
1611          * these bits must be identical in all allocated range.
1612          */
1613
1614         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1615                 virt_addr_arr[i] =
1616                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1617                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1618                                                 &dma_addr_arr[i],
1619                                                 GFP_KERNEL | __GFP_ZERO);
1620                 if (!virt_addr_arr[i]) {
1621                         rc = -ENOMEM;
1622                         goto free_dma_mem_arr;
1623                 }
1624
1625                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1626                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1627                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1628                         break;
1629         }
1630
1631         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1632                 dev_err(hdev->dev,
1633                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1634                 rc = -EFAULT;
1635                 goto free_dma_mem_arr;
1636         }
1637
1638         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1639         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1640         hdev->cpu_pci_msb_addr =
1641                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1642
1643         if (!hdev->asic_prop.fw_security_enabled)
1644                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1645
1646 free_dma_mem_arr:
1647         for (j = 0 ; j < i ; j++)
1648                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1649                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1650                                                 virt_addr_arr[j],
1651                                                 dma_addr_arr[j]);
1652
1653         return rc;
1654 }
1655
1656 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1657 {
1658         struct gaudi_device *gaudi = hdev->asic_specific;
1659         struct gaudi_internal_qman_info *q;
1660         u32 i;
1661
1662         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1663                 q = &gaudi->internal_qmans[i];
1664                 if (!q->pq_kernel_addr)
1665                         continue;
1666                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1667                                                         q->pq_kernel_addr,
1668                                                         q->pq_dma_addr);
1669         }
1670 }
1671
1672 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1673 {
1674         struct gaudi_device *gaudi = hdev->asic_specific;
1675         struct gaudi_internal_qman_info *q;
1676         int rc, i;
1677
1678         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1679                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1680                         continue;
1681
1682                 q = &gaudi->internal_qmans[i];
1683
1684                 switch (i) {
1685                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1686                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1687                         break;
1688                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1689                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1690                         break;
1691                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1692                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1693                         break;
1694                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1695                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1696                         break;
1697                 default:
1698                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1699                         rc = -EINVAL;
1700                         goto free_internal_qmans_pq_mem;
1701                 }
1702
1703                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1704                                                 hdev, q->pq_size,
1705                                                 &q->pq_dma_addr,
1706                                                 GFP_KERNEL | __GFP_ZERO);
1707                 if (!q->pq_kernel_addr) {
1708                         rc = -ENOMEM;
1709                         goto free_internal_qmans_pq_mem;
1710                 }
1711         }
1712
1713         return 0;
1714
1715 free_internal_qmans_pq_mem:
1716         gaudi_free_internal_qmans_pq_mem(hdev);
1717         return rc;
1718 }
1719
1720 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1721 {
1722         struct asic_fixed_properties *prop = &hdev->asic_prop;
1723         struct pci_mem_region *region;
1724
1725         /* CFG */
1726         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1727         region->region_base = CFG_BASE;
1728         region->region_size = CFG_SIZE;
1729         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1730         region->bar_size = CFG_BAR_SIZE;
1731         region->bar_id = CFG_BAR_ID;
1732         region->used = 1;
1733
1734         /* SRAM */
1735         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1736         region->region_base = SRAM_BASE_ADDR;
1737         region->region_size = SRAM_SIZE;
1738         region->offset_in_bar = 0;
1739         region->bar_size = SRAM_BAR_SIZE;
1740         region->bar_id = SRAM_BAR_ID;
1741         region->used = 1;
1742
1743         /* DRAM */
1744         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1745         region->region_base = DRAM_PHYS_BASE;
1746         region->region_size = hdev->asic_prop.dram_size;
1747         region->offset_in_bar = 0;
1748         region->bar_size = prop->dram_pci_bar_size;
1749         region->bar_id = HBM_BAR_ID;
1750         region->used = 1;
1751
1752         /* SP SRAM */
1753         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1754         region->region_base = PSOC_SCRATCHPAD_ADDR;
1755         region->region_size = PSOC_SCRATCHPAD_SIZE;
1756         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1757         region->bar_size = CFG_BAR_SIZE;
1758         region->bar_id = CFG_BAR_ID;
1759         region->used = 1;
1760 }
1761
1762 static int gaudi_sw_init(struct hl_device *hdev)
1763 {
1764         struct gaudi_device *gaudi;
1765         u32 i, event_id = 0;
1766         int rc;
1767
1768         /* Allocate device structure */
1769         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1770         if (!gaudi)
1771                 return -ENOMEM;
1772
1773         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1774                 if (gaudi_irq_map_table[i].valid) {
1775                         if (event_id == GAUDI_EVENT_SIZE) {
1776                                 dev_err(hdev->dev,
1777                                         "Event array exceeds the limit of %u events\n",
1778                                         GAUDI_EVENT_SIZE);
1779                                 rc = -EINVAL;
1780                                 goto free_gaudi_device;
1781                         }
1782
1783                         gaudi->events[event_id++] =
1784                                         gaudi_irq_map_table[i].fc_id;
1785                 }
1786         }
1787
1788         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1789
1790         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1791
1792         hdev->asic_specific = gaudi;
1793
1794         /* Create DMA pool for small allocations */
1795         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1796                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1797         if (!hdev->dma_pool) {
1798                 dev_err(hdev->dev, "failed to create DMA pool\n");
1799                 rc = -ENOMEM;
1800                 goto free_gaudi_device;
1801         }
1802
1803         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1804         if (rc)
1805                 goto free_dma_pool;
1806
1807         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1808         if (!hdev->cpu_accessible_dma_pool) {
1809                 dev_err(hdev->dev,
1810                         "Failed to create CPU accessible DMA pool\n");
1811                 rc = -ENOMEM;
1812                 goto free_cpu_dma_mem;
1813         }
1814
1815         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1816                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1817                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1818         if (rc) {
1819                 dev_err(hdev->dev,
1820                         "Failed to add memory to CPU accessible DMA pool\n");
1821                 rc = -EFAULT;
1822                 goto free_cpu_accessible_dma_pool;
1823         }
1824
1825         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1826         if (rc)
1827                 goto free_cpu_accessible_dma_pool;
1828
1829         spin_lock_init(&gaudi->hw_queues_lock);
1830         mutex_init(&gaudi->clk_gate_mutex);
1831
1832         hdev->supports_sync_stream = true;
1833         hdev->supports_coresight = true;
1834         hdev->supports_staged_submission = true;
1835         hdev->supports_wait_for_multi_cs = true;
1836
1837         gaudi_set_pci_memory_regions(hdev);
1838
1839         return 0;
1840
1841 free_cpu_accessible_dma_pool:
1842         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1843 free_cpu_dma_mem:
1844         if (!hdev->asic_prop.fw_security_enabled)
1845                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1846                                         hdev->cpu_pci_msb_addr);
1847         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1848                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1849                         hdev->cpu_accessible_dma_mem,
1850                         hdev->cpu_accessible_dma_address);
1851 free_dma_pool:
1852         dma_pool_destroy(hdev->dma_pool);
1853 free_gaudi_device:
1854         kfree(gaudi);
1855         return rc;
1856 }
1857
1858 static int gaudi_sw_fini(struct hl_device *hdev)
1859 {
1860         struct gaudi_device *gaudi = hdev->asic_specific;
1861
1862         gaudi_free_internal_qmans_pq_mem(hdev);
1863
1864         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1865
1866         if (!hdev->asic_prop.fw_security_enabled)
1867                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1868                                         hdev->cpu_pci_msb_addr);
1869
1870         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1871                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1872                         hdev->cpu_accessible_dma_mem,
1873                         hdev->cpu_accessible_dma_address);
1874
1875         dma_pool_destroy(hdev->dma_pool);
1876
1877         mutex_destroy(&gaudi->clk_gate_mutex);
1878
1879         kfree(gaudi);
1880
1881         return 0;
1882 }
1883
1884 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1885 {
1886         struct hl_device *hdev = arg;
1887         int i;
1888
1889         if (hdev->disabled)
1890                 return IRQ_HANDLED;
1891
1892         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1893                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1894
1895         hl_irq_handler_eq(irq, &hdev->event_queue);
1896
1897         return IRQ_HANDLED;
1898 }
1899
1900 /*
1901  * For backward compatibility, new MSI interrupts should be set after the
1902  * existing CPU and NIC interrupts.
1903  */
1904 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1905                                 bool cpu_eq)
1906 {
1907         int msi_vec;
1908
1909         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1910                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1911                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1912
1913         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1914                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1915
1916         return pci_irq_vector(hdev->pdev, msi_vec);
1917 }
1918
1919 static int gaudi_enable_msi_single(struct hl_device *hdev)
1920 {
1921         int rc, irq;
1922
1923         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1924
1925         irq = gaudi_pci_irq_vector(hdev, 0, false);
1926         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1927                         "gaudi single msi", hdev);
1928         if (rc)
1929                 dev_err(hdev->dev,
1930                         "Failed to request single MSI IRQ\n");
1931
1932         return rc;
1933 }
1934
1935 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1936 {
1937         int cq_cnt = hdev->asic_prop.completion_queues_count;
1938         int rc, i, irq_cnt_init, irq;
1939
1940         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1941                 irq = gaudi_pci_irq_vector(hdev, i, false);
1942                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1943                                 &hdev->completion_queue[i]);
1944                 if (rc) {
1945                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1946                         goto free_irqs;
1947                 }
1948         }
1949
1950         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1951         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1952                                 &hdev->event_queue);
1953         if (rc) {
1954                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1955                 goto free_irqs;
1956         }
1957
1958         return 0;
1959
1960 free_irqs:
1961         for (i = 0 ; i < irq_cnt_init ; i++)
1962                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1963                                 &hdev->completion_queue[i]);
1964         return rc;
1965 }
1966
1967 static int gaudi_enable_msi(struct hl_device *hdev)
1968 {
1969         struct gaudi_device *gaudi = hdev->asic_specific;
1970         int rc;
1971
1972         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1973                 return 0;
1974
1975         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1976         if (rc < 0) {
1977                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1978                 return rc;
1979         }
1980
1981         if (rc < NUMBER_OF_INTERRUPTS) {
1982                 gaudi->multi_msi_mode = false;
1983                 rc = gaudi_enable_msi_single(hdev);
1984         } else {
1985                 gaudi->multi_msi_mode = true;
1986                 rc = gaudi_enable_msi_multi(hdev);
1987         }
1988
1989         if (rc)
1990                 goto free_pci_irq_vectors;
1991
1992         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1993
1994         return 0;
1995
1996 free_pci_irq_vectors:
1997         pci_free_irq_vectors(hdev->pdev);
1998         return rc;
1999 }
2000
2001 static void gaudi_sync_irqs(struct hl_device *hdev)
2002 {
2003         struct gaudi_device *gaudi = hdev->asic_specific;
2004         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2005
2006         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2007                 return;
2008
2009         /* Wait for all pending IRQs to be finished */
2010         if (gaudi->multi_msi_mode) {
2011                 for (i = 0 ; i < cq_cnt ; i++)
2012                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2013
2014                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2015                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2016                                                 true));
2017         } else {
2018                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2019         }
2020 }
2021
2022 static void gaudi_disable_msi(struct hl_device *hdev)
2023 {
2024         struct gaudi_device *gaudi = hdev->asic_specific;
2025         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2026
2027         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2028                 return;
2029
2030         gaudi_sync_irqs(hdev);
2031
2032         if (gaudi->multi_msi_mode) {
2033                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2034                                                 true);
2035                 free_irq(irq, &hdev->event_queue);
2036
2037                 for (i = 0 ; i < cq_cnt ; i++) {
2038                         irq = gaudi_pci_irq_vector(hdev, i, false);
2039                         free_irq(irq, &hdev->completion_queue[i]);
2040                 }
2041         } else {
2042                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2043         }
2044
2045         pci_free_irq_vectors(hdev->pdev);
2046
2047         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2048 }
2049
2050 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2051 {
2052         struct gaudi_device *gaudi = hdev->asic_specific;
2053
2054         if (hdev->asic_prop.fw_security_enabled)
2055                 return;
2056
2057         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2058                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2059                 return;
2060
2061         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2062                 return;
2063
2064         if (!hdev->sram_scrambler_enable)
2065                 return;
2066
2067         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2068                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2069         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2070                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2071         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2072                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2073         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2074                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2075         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2076                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2077         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2078                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2079         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2080                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2081         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2082                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2083
2084         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2085                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2087                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2089                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2091                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2093                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2095                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2097                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2099                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100
2101         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2102                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2103         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2104                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2105         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2106                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2107         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2108                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2109         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2110                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2111         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2112                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2113         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2114                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2115         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2116                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2117
2118         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2119 }
2120
2121 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2122 {
2123         struct gaudi_device *gaudi = hdev->asic_specific;
2124
2125         if (hdev->asic_prop.fw_security_enabled)
2126                 return;
2127
2128         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2129                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2130                 return;
2131
2132         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2133                 return;
2134
2135         if (!hdev->dram_scrambler_enable)
2136                 return;
2137
2138         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2139                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2140         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2141                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2142         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2143                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2144         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2145                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2146         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2147                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2148         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2149                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2150         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2151                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2153                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154
2155         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2156                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2158                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2160                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2162                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2164                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2166                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2167         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2168                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2170                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171
2172         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2173                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2174         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2175                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2176         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2177                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2178         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2179                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2180         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2181                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2182         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2183                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2184         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2185                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2187                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188
2189         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2190 }
2191
2192 static void gaudi_init_e2e(struct hl_device *hdev)
2193 {
2194         if (hdev->asic_prop.fw_security_enabled)
2195                 return;
2196
2197         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2198                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2199                 return;
2200
2201         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2202         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2203         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2204         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2205
2206         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2207         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2208         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2209         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2210
2211         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2212         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2213         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2214         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2215
2216         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2217         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2218         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2219         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2220
2221         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2222         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2223         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2224         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2225
2226         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2227         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2228         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2229         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2230
2231         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2232         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2233         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2234         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2235
2236         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2237         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2238         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2239         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2240
2241         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2242         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2243         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2244         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2245
2246         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2247         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2248         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2249         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2250
2251         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2252         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2253         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2254         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2255
2256         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2257         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2258         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2259         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2260
2261         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2262         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2263         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2264         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2265
2266         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2267         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2268         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2269         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2270
2271         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2272         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2273         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2274         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2275
2276         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2277         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2278         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2279         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2280
2281         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2282         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2283         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2284         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2285
2286         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2287         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2288         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2289         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2290
2291         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2292         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2293         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2294         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2295
2296         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2297         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2298         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2299         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2300
2301         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2302         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2303         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2304         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2305
2306         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2307         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2308         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2309         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2310
2311         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2312         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2313         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2314         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2315
2316         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2317         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2318         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2319         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2320
2321         if (!hdev->dram_scrambler_enable) {
2322                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2323                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2324                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2325                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2326
2327                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2328                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2329                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2330                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2331
2332                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2333                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2334                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2335                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2336
2337                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2338                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2339                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2340                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2341
2342                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2343                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2344                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2345                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2346
2347                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2348                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2349                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2350                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2351
2352                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2353                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2354                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2355                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2356
2357                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2358                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2359                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2360                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2361
2362                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2363                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2364                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2365                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2366
2367                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2368                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2369                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2370                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2371
2372                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2373                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2374                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2375                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2376
2377                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2378                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2379                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2380                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2381
2382                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2383                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2384                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2385                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2386
2387                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2388                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2389                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2390                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2391
2392                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2393                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2394                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2395                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2396
2397                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2398                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2399                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2400                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2401
2402                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2403                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2404                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2405                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2406
2407                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2408                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2409                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2410                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2411
2412                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2413                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2414                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2415                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2416
2417                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2418                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2419                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2420                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2421
2422                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2423                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2424                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2425                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2426
2427                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2428                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2429                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2430                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2431
2432                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2433                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2434                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2435                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2436
2437                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2438                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2439                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2440                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2441         }
2442
2443         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2444                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2445         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2446                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2447
2448         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2449                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2450         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2451                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2452
2453         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2454                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2455         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2456                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2457
2458         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2459                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2460         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2461                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2462
2463         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2464                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2465         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2466                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2467
2468         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2469                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2470         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2471                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2472
2473         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2474                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2475         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2476                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2477
2478         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2479                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2480         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2481                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2482
2483         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2484                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2485         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2486                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2487
2488         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2489                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2490         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2491                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2492
2493         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2494                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2495         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2496                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2497
2498         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2499                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2500         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2501                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2502
2503         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2504                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2505         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2506                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2507
2508         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2509                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2510         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2511                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2512
2513         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2514                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2515         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2516                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2517
2518         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2519                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2520         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2521                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2522
2523         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2524                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2525         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2526                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2527
2528         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2529                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2530         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2531                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2532
2533         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2534                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2535         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2536                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2537
2538         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2539                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2540         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2541                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2542
2543         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2544                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2545         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2546                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2547
2548         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2549                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2550         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2551                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2552
2553         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2554                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2555         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2556                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2557
2558         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2559                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2560         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2561                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2562 }
2563
2564 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2565 {
2566         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2567
2568         if (hdev->asic_prop.fw_security_enabled)
2569                 return;
2570
2571         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2572                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2573                 return;
2574
2575         hbm0_wr = 0x33333333;
2576         hbm0_rd = 0x77777777;
2577         hbm1_wr = 0x55555555;
2578         hbm1_rd = 0xDDDDDDDD;
2579
2580         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2581         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2582         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2583         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2584
2585         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2586         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2587         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2588         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2589
2590         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2591         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2592         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2593         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2594
2595         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2596         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2597         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2598         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2599
2600         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2601                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2602                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2603         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2604                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2605                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2606         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2607                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2608                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2609         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2610                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2611                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2612
2613         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2614                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2615                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2616         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2617                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2618                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2619         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2620                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2621                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2622         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2623                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2624                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2625 }
2626
2627 static void gaudi_init_golden_registers(struct hl_device *hdev)
2628 {
2629         u32 tpc_offset;
2630         int tpc_id, i;
2631
2632         gaudi_init_e2e(hdev);
2633         gaudi_init_hbm_cred(hdev);
2634
2635         for (tpc_id = 0, tpc_offset = 0;
2636                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2637                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2638                 /* Mask all arithmetic interrupts from TPC */
2639                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2640                 /* Set 16 cache lines */
2641                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2642                                 ICACHE_FETCH_LINE_NUM, 2);
2643         }
2644
2645         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2646         for (i = 0 ; i < 128 ; i += 8)
2647                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2648
2649         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2650         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2651         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2652         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2653 }
2654
2655 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2656                                         int qman_id, dma_addr_t qman_pq_addr)
2657 {
2658         struct cpu_dyn_regs *dyn_regs =
2659                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2660         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2661         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2662         u32 q_off, dma_qm_offset;
2663         u32 dma_qm_err_cfg, irq_handler_offset;
2664
2665         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2666
2667         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2668                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2669         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2670                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2671         so_base_en_lo = lower_32_bits(CFG_BASE +
2672                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2673         so_base_en_hi = upper_32_bits(CFG_BASE +
2674                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2675         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2676                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2677         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2678                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2679         so_base_ws_lo = lower_32_bits(CFG_BASE +
2680                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2681         so_base_ws_hi = upper_32_bits(CFG_BASE +
2682                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2683
2684         q_off = dma_qm_offset + qman_id * 4;
2685
2686         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2687         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2688
2689         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2690         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2691         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2692
2693         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2694         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2695                                                         QMAN_LDMA_SRC_OFFSET);
2696         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2697                                                         QMAN_LDMA_DST_OFFSET);
2698
2699         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2700         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2701         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2702         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2703         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2704         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2705         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2706         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2707
2708         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2709
2710         /* The following configuration is needed only once per QMAN */
2711         if (qman_id == 0) {
2712                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2713                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2714                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2715
2716                 /* Configure RAZWI IRQ */
2717                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2718                 if (hdev->stop_on_err)
2719                         dma_qm_err_cfg |=
2720                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2721
2722                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2723
2724                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2725                         lower_32_bits(CFG_BASE + irq_handler_offset));
2726                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2727                         upper_32_bits(CFG_BASE + irq_handler_offset));
2728
2729                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2730                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2731                                                                         dma_id);
2732
2733                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2734                                 QM_ARB_ERR_MSG_EN_MASK);
2735
2736                 /* Increase ARB WDT to support streams architecture */
2737                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2738                                 GAUDI_ARB_WDT_TIMEOUT);
2739
2740                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2741                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2742
2743                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2744         }
2745 }
2746
2747 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2748 {
2749         struct cpu_dyn_regs *dyn_regs =
2750                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2751         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2752         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2753         u32 irq_handler_offset;
2754
2755         /* Set to maximum possible according to physical size */
2756         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2757         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2758
2759         /* WA for H/W bug H3-2116 */
2760         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2761
2762         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2763         if (hdev->stop_on_err)
2764                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2765
2766         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2767
2768         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2769                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2770                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2771
2772         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2773                 lower_32_bits(CFG_BASE + irq_handler_offset));
2774         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2775                 upper_32_bits(CFG_BASE + irq_handler_offset));
2776
2777         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2778                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2779         WREG32(mmDMA0_CORE_PROT + dma_offset,
2780                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2781         /* If the channel is secured, it should be in MMU bypass mode */
2782         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2783                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2784         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2785 }
2786
2787 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2788                                 u32 enable_mask)
2789 {
2790         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2791
2792         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2793 }
2794
2795 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2796 {
2797         struct gaudi_device *gaudi = hdev->asic_specific;
2798         struct hl_hw_queue *q;
2799         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2800
2801         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2802                 return;
2803
2804         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2805                 dma_id = gaudi_dma_assignment[i];
2806                 /*
2807                  * For queues after the CPU Q need to add 1 to get the correct
2808                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2809                  * order to get the correct MSI register.
2810                  */
2811                 if (dma_id > 1) {
2812                         cpu_skip = 1;
2813                         nic_skip = NIC_NUMBER_OF_ENGINES;
2814                 } else {
2815                         cpu_skip = 0;
2816                         nic_skip = 0;
2817                 }
2818
2819                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2820                         q_idx = 4 * dma_id + j + cpu_skip;
2821                         q = &hdev->kernel_queues[q_idx];
2822                         q->cq_id = cq_id++;
2823                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2824                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2825                                                 q->bus_address);
2826                 }
2827
2828                 gaudi_init_dma_core(hdev, dma_id);
2829
2830                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2831         }
2832
2833         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2834 }
2835
2836 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2837                                         int qman_id, u64 qman_base_addr)
2838 {
2839         struct cpu_dyn_regs *dyn_regs =
2840                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2841         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2842         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2843         u32 dma_qm_err_cfg, irq_handler_offset;
2844         u32 q_off, dma_qm_offset;
2845
2846         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2847
2848         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2849                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2850         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2851                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2852         so_base_en_lo = lower_32_bits(CFG_BASE +
2853                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2854         so_base_en_hi = upper_32_bits(CFG_BASE +
2855                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2856         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2857                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2858         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2859                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2860         so_base_ws_lo = lower_32_bits(CFG_BASE +
2861                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2862         so_base_ws_hi = upper_32_bits(CFG_BASE +
2863                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2864
2865         q_off = dma_qm_offset + qman_id * 4;
2866
2867         if (qman_id < 4) {
2868                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2869                                         lower_32_bits(qman_base_addr));
2870                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2871                                         upper_32_bits(qman_base_addr));
2872
2873                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2874                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2875                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2876
2877                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2878                                                         QMAN_CPDMA_SIZE_OFFSET);
2879                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2880                                                         QMAN_CPDMA_SRC_OFFSET);
2881                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2882                                                         QMAN_CPDMA_DST_OFFSET);
2883         } else {
2884                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2885                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2886                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2887
2888                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2889                                                         QMAN_LDMA_SIZE_OFFSET);
2890                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2891                                                         QMAN_LDMA_SRC_OFFSET);
2892                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2893                                                         QMAN_LDMA_DST_OFFSET);
2894
2895                 /* Configure RAZWI IRQ */
2896                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2897                 if (hdev->stop_on_err)
2898                         dma_qm_err_cfg |=
2899                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2900
2901                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2902
2903                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2904                         lower_32_bits(CFG_BASE + irq_handler_offset));
2905                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2906                         upper_32_bits(CFG_BASE + irq_handler_offset));
2907
2908                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2909                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2910                                                                         dma_id);
2911
2912                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2913                                 QM_ARB_ERR_MSG_EN_MASK);
2914
2915                 /* Increase ARB WDT to support streams architecture */
2916                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2917                                 GAUDI_ARB_WDT_TIMEOUT);
2918
2919                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2920                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2921                                 QMAN_INTERNAL_MAKE_TRUSTED);
2922         }
2923
2924         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2925         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2926         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2927         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2928
2929         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2930         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2931                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2932                                 mtr_base_ws_lo);
2933                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2934                                 mtr_base_ws_hi);
2935                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2936                                 so_base_ws_lo);
2937                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2938                                 so_base_ws_hi);
2939         }
2940 }
2941
2942 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2943 {
2944         struct gaudi_device *gaudi = hdev->asic_specific;
2945         struct gaudi_internal_qman_info *q;
2946         u64 qman_base_addr;
2947         int i, j, dma_id, internal_q_index;
2948
2949         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2950                 return;
2951
2952         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2953                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2954
2955                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2956                          /*
2957                           * Add the CPU queue in order to get the correct queue
2958                           * number as all internal queue are placed after it
2959                           */
2960                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2961
2962                         q = &gaudi->internal_qmans[internal_q_index];
2963                         qman_base_addr = (u64) q->pq_dma_addr;
2964                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2965                                                 qman_base_addr);
2966                 }
2967
2968                 /* Initializing lower CP for HBM DMA QMAN */
2969                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2970
2971                 gaudi_init_dma_core(hdev, dma_id);
2972
2973                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2974         }
2975
2976         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2977 }
2978
2979 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2980                                         int qman_id, u64 qman_base_addr)
2981 {
2982         struct cpu_dyn_regs *dyn_regs =
2983                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2984         u32 mtr_base_lo, mtr_base_hi;
2985         u32 so_base_lo, so_base_hi;
2986         u32 irq_handler_offset;
2987         u32 q_off, mme_id;
2988         u32 mme_qm_err_cfg;
2989
2990         mtr_base_lo = lower_32_bits(CFG_BASE +
2991                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2992         mtr_base_hi = upper_32_bits(CFG_BASE +
2993                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2994         so_base_lo = lower_32_bits(CFG_BASE +
2995                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2996         so_base_hi = upper_32_bits(CFG_BASE +
2997                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2998
2999         q_off = mme_offset + qman_id * 4;
3000
3001         if (qman_id < 4) {
3002                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3003                                         lower_32_bits(qman_base_addr));
3004                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3005                                         upper_32_bits(qman_base_addr));
3006
3007                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3008                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3009                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3010
3011                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3012                                                         QMAN_CPDMA_SIZE_OFFSET);
3013                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3014                                                         QMAN_CPDMA_SRC_OFFSET);
3015                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3016                                                         QMAN_CPDMA_DST_OFFSET);
3017         } else {
3018                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3019                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3020                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3021
3022                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3023                                                         QMAN_LDMA_SIZE_OFFSET);
3024                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3025                                                         QMAN_LDMA_SRC_OFFSET);
3026                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3027                                                         QMAN_LDMA_DST_OFFSET);
3028
3029                 /* Configure RAZWI IRQ */
3030                 mme_id = mme_offset /
3031                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3032
3033                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3034                 if (hdev->stop_on_err)
3035                         mme_qm_err_cfg |=
3036                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3037
3038                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3039
3040                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3041                         lower_32_bits(CFG_BASE + irq_handler_offset));
3042                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3043                         upper_32_bits(CFG_BASE + irq_handler_offset));
3044
3045                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3046                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3047                                                                         mme_id);
3048
3049                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3050                                 QM_ARB_ERR_MSG_EN_MASK);
3051
3052                 /* Increase ARB WDT to support streams architecture */
3053                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3054                                 GAUDI_ARB_WDT_TIMEOUT);
3055
3056                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3057                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3058                                 QMAN_INTERNAL_MAKE_TRUSTED);
3059         }
3060
3061         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3062         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3063         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3064         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3065 }
3066
3067 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3068 {
3069         struct gaudi_device *gaudi = hdev->asic_specific;
3070         struct gaudi_internal_qman_info *q;
3071         u64 qman_base_addr;
3072         u32 mme_offset;
3073         int i, internal_q_index;
3074
3075         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3076                 return;
3077
3078         /*
3079          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3080          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3081          */
3082
3083         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3084
3085         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3086                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3087                 q = &gaudi->internal_qmans[internal_q_index];
3088                 qman_base_addr = (u64) q->pq_dma_addr;
3089                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3090                                         qman_base_addr);
3091                 if (i == 3)
3092                         mme_offset = 0;
3093         }
3094
3095         /* Initializing lower CP for MME QMANs */
3096         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3097         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3098         gaudi_init_mme_qman(hdev, 0, 4, 0);
3099
3100         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3101         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3102
3103         gaudi->hw_cap_initialized |= HW_CAP_MME;
3104 }
3105
3106 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3107                                 int qman_id, u64 qman_base_addr)
3108 {
3109         struct cpu_dyn_regs *dyn_regs =
3110                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3111         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3112         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3113         u32 tpc_qm_err_cfg, irq_handler_offset;
3114         u32 q_off, tpc_id;
3115
3116         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3117                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3118         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3119                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3120         so_base_en_lo = lower_32_bits(CFG_BASE +
3121                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3122         so_base_en_hi = upper_32_bits(CFG_BASE +
3123                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3124         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3125                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3126         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3127                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3128         so_base_ws_lo = lower_32_bits(CFG_BASE +
3129                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3130         so_base_ws_hi = upper_32_bits(CFG_BASE +
3131                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3132
3133         q_off = tpc_offset + qman_id * 4;
3134
3135         tpc_id = tpc_offset /
3136                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3137
3138         if (qman_id < 4) {
3139                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3140                                         lower_32_bits(qman_base_addr));
3141                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3142                                         upper_32_bits(qman_base_addr));
3143
3144                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3145                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3146                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3147
3148                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3149                                                         QMAN_CPDMA_SIZE_OFFSET);
3150                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3151                                                         QMAN_CPDMA_SRC_OFFSET);
3152                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3153                                                         QMAN_CPDMA_DST_OFFSET);
3154         } else {
3155                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3156                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3157                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3158
3159                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3160                                                         QMAN_LDMA_SIZE_OFFSET);
3161                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3162                                                         QMAN_LDMA_SRC_OFFSET);
3163                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3164                                                         QMAN_LDMA_DST_OFFSET);
3165
3166                 /* Configure RAZWI IRQ */
3167                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3168                 if (hdev->stop_on_err)
3169                         tpc_qm_err_cfg |=
3170                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3171
3172                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3173
3174                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3175                         lower_32_bits(CFG_BASE + irq_handler_offset));
3176                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3177                         upper_32_bits(CFG_BASE + irq_handler_offset));
3178
3179                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3180                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3181                                                                         tpc_id);
3182
3183                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3184                                 QM_ARB_ERR_MSG_EN_MASK);
3185
3186                 /* Increase ARB WDT to support streams architecture */
3187                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3188                                 GAUDI_ARB_WDT_TIMEOUT);
3189
3190                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3191                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3192                                 QMAN_INTERNAL_MAKE_TRUSTED);
3193         }
3194
3195         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3196         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3197         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3198         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3199
3200         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3201         if (tpc_id == 6) {
3202                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3203                                 mtr_base_ws_lo);
3204                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3205                                 mtr_base_ws_hi);
3206                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3207                                 so_base_ws_lo);
3208                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3209                                 so_base_ws_hi);
3210         }
3211 }
3212
3213 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3214 {
3215         struct gaudi_device *gaudi = hdev->asic_specific;
3216         struct gaudi_internal_qman_info *q;
3217         u64 qman_base_addr;
3218         u32 so_base_hi, tpc_offset = 0;
3219         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3220                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3221         int i, tpc_id, internal_q_index;
3222
3223         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3224                 return;
3225
3226         so_base_hi = upper_32_bits(CFG_BASE +
3227                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3228
3229         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3230                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3231                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3232                                                 tpc_id * QMAN_STREAMS + i;
3233                         q = &gaudi->internal_qmans[internal_q_index];
3234                         qman_base_addr = (u64) q->pq_dma_addr;
3235                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3236                                                 qman_base_addr);
3237
3238                         if (i == 3) {
3239                                 /* Initializing lower CP for TPC QMAN */
3240                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3241
3242                                 /* Enable the QMAN and TPC channel */
3243                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3244                                                 QMAN_TPC_ENABLE);
3245                         }
3246                 }
3247
3248                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3249                                 so_base_hi);
3250
3251                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3252
3253                 gaudi->hw_cap_initialized |=
3254                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3255         }
3256 }
3257
3258 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3259                                 int qman_id, u64 qman_base_addr, int nic_id)
3260 {
3261         struct cpu_dyn_regs *dyn_regs =
3262                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3263         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3264         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3265         u32 nic_qm_err_cfg, irq_handler_offset;
3266         u32 q_off;
3267
3268         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3269                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3270         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3271                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3272         so_base_en_lo = lower_32_bits(CFG_BASE +
3273                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3274         so_base_en_hi = upper_32_bits(CFG_BASE +
3275                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3276         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3277                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3278         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3279                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3280         so_base_ws_lo = lower_32_bits(CFG_BASE +
3281                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3282         so_base_ws_hi = upper_32_bits(CFG_BASE +
3283                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3284
3285         q_off = nic_offset + qman_id * 4;
3286
3287         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3288         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3289
3290         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3291         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3292         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3293
3294         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3295                                                         QMAN_LDMA_SIZE_OFFSET);
3296         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3297                                                         QMAN_LDMA_SRC_OFFSET);
3298         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3299                                                         QMAN_LDMA_DST_OFFSET);
3300
3301         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3302         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3303         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3304         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3305
3306         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3307         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3308         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3309         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3310         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3311
3312         if (qman_id == 0) {
3313                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3314                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3315                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3316
3317                 /* Configure RAZWI IRQ */
3318                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3319                 if (hdev->stop_on_err)
3320                         nic_qm_err_cfg |=
3321                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3322
3323                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3324
3325                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3326                         lower_32_bits(CFG_BASE + irq_handler_offset));
3327                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3328                         upper_32_bits(CFG_BASE + irq_handler_offset));
3329
3330                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3331                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3332                                                                         nic_id);
3333
3334                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3335                                 QM_ARB_ERR_MSG_EN_MASK);
3336
3337                 /* Increase ARB WDT to support streams architecture */
3338                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3339                                 GAUDI_ARB_WDT_TIMEOUT);
3340
3341                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3342                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3343                                 QMAN_INTERNAL_MAKE_TRUSTED);
3344         }
3345 }
3346
3347 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3348 {
3349         struct gaudi_device *gaudi = hdev->asic_specific;
3350         struct gaudi_internal_qman_info *q;
3351         u64 qman_base_addr;
3352         u32 nic_offset = 0;
3353         u32 nic_delta_between_qmans =
3354                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3355         u32 nic_delta_between_nics =
3356                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3357         int i, nic_id, internal_q_index;
3358
3359         if (!hdev->nic_ports_mask)
3360                 return;
3361
3362         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3363                 return;
3364
3365         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3366
3367         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3368                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3369                         nic_offset += nic_delta_between_qmans;
3370                         if (nic_id & 1) {
3371                                 nic_offset -= (nic_delta_between_qmans * 2);
3372                                 nic_offset += nic_delta_between_nics;
3373                         }
3374                         continue;
3375                 }
3376
3377                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3378                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3379                                                 nic_id * QMAN_STREAMS + i;
3380                         q = &gaudi->internal_qmans[internal_q_index];
3381                         qman_base_addr = (u64) q->pq_dma_addr;
3382                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3383                                                 qman_base_addr, nic_id);
3384                 }
3385
3386                 /* Enable the QMAN */
3387                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3388
3389                 nic_offset += nic_delta_between_qmans;
3390                 if (nic_id & 1) {
3391                         nic_offset -= (nic_delta_between_qmans * 2);
3392                         nic_offset += nic_delta_between_nics;
3393                 }
3394
3395                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3396         }
3397 }
3398
3399 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3400 {
3401         struct gaudi_device *gaudi = hdev->asic_specific;
3402
3403         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3404                 return;
3405
3406         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3407         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3408         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3409 }
3410
3411 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3412 {
3413         struct gaudi_device *gaudi = hdev->asic_specific;
3414
3415         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3416                 return;
3417
3418         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3419         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3420         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3421         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3422         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3423 }
3424
3425 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3426 {
3427         struct gaudi_device *gaudi = hdev->asic_specific;
3428
3429         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3430                 return;
3431
3432         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3433         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3434 }
3435
3436 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3437 {
3438         struct gaudi_device *gaudi = hdev->asic_specific;
3439         u32 tpc_offset = 0;
3440         int tpc_id;
3441
3442         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3443                 return;
3444
3445         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3446                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3447                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3448         }
3449 }
3450
3451 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3452 {
3453         struct gaudi_device *gaudi = hdev->asic_specific;
3454         u32 nic_mask, nic_offset = 0;
3455         u32 nic_delta_between_qmans =
3456                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3457         u32 nic_delta_between_nics =
3458                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3459         int nic_id;
3460
3461         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3462                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3463
3464                 if (gaudi->hw_cap_initialized & nic_mask)
3465                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3466
3467                 nic_offset += nic_delta_between_qmans;
3468                 if (nic_id & 1) {
3469                         nic_offset -= (nic_delta_between_qmans * 2);
3470                         nic_offset += nic_delta_between_nics;
3471                 }
3472         }
3473 }
3474
3475 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3476 {
3477         struct gaudi_device *gaudi = hdev->asic_specific;
3478
3479         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3480                 return;
3481
3482         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3483         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3484         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3485         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3486 }
3487
3488 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3489 {
3490         struct gaudi_device *gaudi = hdev->asic_specific;
3491
3492         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3493                 return;
3494
3495         /* Stop CPs of HBM DMA QMANs */
3496
3497         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3498         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3499         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3500         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3501         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3502 }
3503
3504 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3505 {
3506         struct gaudi_device *gaudi = hdev->asic_specific;
3507
3508         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3509                 return;
3510
3511         /* Stop CPs of MME QMANs */
3512         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3513         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3514 }
3515
3516 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3517 {
3518         struct gaudi_device *gaudi = hdev->asic_specific;
3519
3520         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3521                 return;
3522
3523         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3524         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3525         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3526         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3527         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3528         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3529         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3530         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3531 }
3532
3533 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3534 {
3535         struct gaudi_device *gaudi = hdev->asic_specific;
3536
3537         /* Stop upper CPs of QMANs */
3538
3539         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3540                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3541                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3542                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3543                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3544
3545         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3546                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3547                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3548                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3549                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3550
3551         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3552                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3553                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3554                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3555                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3556
3557         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3558                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3559                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3560                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3561                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3562
3563         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3564                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3565                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3566                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3567                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3568
3569         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3570                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3571                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3572                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3573                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3574
3575         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3576                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3577                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3578                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3579                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3580
3581         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3582                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3583                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3584                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3585                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3586
3587         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3588                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3589                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3590                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3591                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3592
3593         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3594                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3595                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3596                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3597                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3598 }
3599
3600 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3601 {
3602         struct gaudi_device *gaudi = hdev->asic_specific;
3603
3604         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3605                 return;
3606
3607         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3608         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3609         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3610 }
3611
3612 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3613 {
3614         struct gaudi_device *gaudi = hdev->asic_specific;
3615
3616         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3617                 return;
3618
3619         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3620         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3621         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3622         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3623         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3624 }
3625
3626 static void gaudi_mme_stall(struct hl_device *hdev)
3627 {
3628         struct gaudi_device *gaudi = hdev->asic_specific;
3629
3630         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3631                 return;
3632
3633         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3634         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3635         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3636         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3637         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3638         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3639         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3640         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3641         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3642         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3643         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3644         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3645         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3646         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3647         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3648         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3649         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3650 }
3651
3652 static void gaudi_tpc_stall(struct hl_device *hdev)
3653 {
3654         struct gaudi_device *gaudi = hdev->asic_specific;
3655
3656         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3657                 return;
3658
3659         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3660         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3661         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3662         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3663         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3664         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3665         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3666         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3667 }
3668
3669 static void gaudi_set_clock_gating(struct hl_device *hdev)
3670 {
3671         struct gaudi_device *gaudi = hdev->asic_specific;
3672         u32 qman_offset;
3673         bool enable;
3674         int i;
3675
3676         /* In case we are during debug session, don't enable the clock gate
3677          * as it may interfere
3678          */
3679         if (hdev->in_debug)
3680                 return;
3681
3682         if (hdev->asic_prop.fw_security_enabled)
3683                 return;
3684
3685         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3686                 enable = !!(hdev->clock_gating_mask &
3687                                 (BIT_ULL(gaudi_dma_assignment[i])));
3688
3689                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3690                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3691                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3692                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3693                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3694         }
3695
3696         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3697                 enable = !!(hdev->clock_gating_mask &
3698                                 (BIT_ULL(gaudi_dma_assignment[i])));
3699
3700                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3701                  * we need to not enable clock gating in that DMA
3702                  */
3703                 if (i == GAUDI_HBM_DMA_4)
3704                         enable = 0;
3705
3706                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3707                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3708                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3709                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3710                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3711         }
3712
3713         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3714         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3715         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3716
3717         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3718         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3719         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3720
3721         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3722                 enable = !!(hdev->clock_gating_mask &
3723                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3724
3725                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3726                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3727                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3728                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3729
3730                 qman_offset += TPC_QMAN_OFFSET;
3731         }
3732
3733         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3734 }
3735
3736 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3737 {
3738         struct gaudi_device *gaudi = hdev->asic_specific;
3739         u32 qman_offset;
3740         int i;
3741
3742         if (hdev->asic_prop.fw_security_enabled)
3743                 return;
3744
3745         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3746                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3747                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3748
3749                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3750         }
3751
3752         WREG32(mmMME0_QM_CGM_CFG, 0);
3753         WREG32(mmMME0_QM_CGM_CFG1, 0);
3754         WREG32(mmMME2_QM_CGM_CFG, 0);
3755         WREG32(mmMME2_QM_CGM_CFG1, 0);
3756
3757         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3758                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3759                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3760
3761                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3762         }
3763
3764         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3765 }
3766
3767 static void gaudi_enable_timestamp(struct hl_device *hdev)
3768 {
3769         /* Disable the timestamp counter */
3770         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3771
3772         /* Zero the lower/upper parts of the 64-bit counter */
3773         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3774         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3775
3776         /* Enable the counter */
3777         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3778 }
3779
3780 static void gaudi_disable_timestamp(struct hl_device *hdev)
3781 {
3782         /* Disable the timestamp counter */
3783         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3784 }
3785
3786 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3787 {
3788         u32 wait_timeout_ms;
3789
3790         dev_info(hdev->dev,
3791                 "Halting compute engines and disabling interrupts\n");
3792
3793         if (hdev->pldm)
3794                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3795         else
3796                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3797
3798         gaudi_stop_nic_qmans(hdev);
3799         gaudi_stop_mme_qmans(hdev);
3800         gaudi_stop_tpc_qmans(hdev);
3801         gaudi_stop_hbm_dma_qmans(hdev);
3802         gaudi_stop_pci_dma_qmans(hdev);
3803
3804         hdev->asic_funcs->disable_clock_gating(hdev);
3805
3806         msleep(wait_timeout_ms);
3807
3808         gaudi_pci_dma_stall(hdev);
3809         gaudi_hbm_dma_stall(hdev);
3810         gaudi_tpc_stall(hdev);
3811         gaudi_mme_stall(hdev);
3812
3813         msleep(wait_timeout_ms);
3814
3815         gaudi_disable_nic_qmans(hdev);
3816         gaudi_disable_mme_qmans(hdev);
3817         gaudi_disable_tpc_qmans(hdev);
3818         gaudi_disable_hbm_dma_qmans(hdev);
3819         gaudi_disable_pci_dma_qmans(hdev);
3820
3821         gaudi_disable_timestamp(hdev);
3822
3823         gaudi_disable_msi(hdev);
3824 }
3825
3826 static int gaudi_mmu_init(struct hl_device *hdev)
3827 {
3828         struct asic_fixed_properties *prop = &hdev->asic_prop;
3829         struct gaudi_device *gaudi = hdev->asic_specific;
3830         u64 hop0_addr;
3831         int rc, i;
3832
3833         if (!hdev->mmu_enable)
3834                 return 0;
3835
3836         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3837                 return 0;
3838
3839         for (i = 0 ; i < prop->max_asid ; i++) {
3840                 hop0_addr = prop->mmu_pgt_addr +
3841                                 (i * prop->mmu_hop_table_size);
3842
3843                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3844                 if (rc) {
3845                         dev_err(hdev->dev,
3846                                 "failed to set hop0 addr for asid %d\n", i);
3847                         goto err;
3848                 }
3849         }
3850
3851         /* init MMU cache manage page */
3852         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3853         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3854
3855         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3856
3857         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3858         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3859
3860         WREG32(mmSTLB_HOP_CONFIGURATION,
3861                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3862
3863         /*
3864          * The H/W expects the first PI after init to be 1. After wraparound
3865          * we'll write 0.
3866          */
3867         gaudi->mmu_cache_inv_pi = 1;
3868
3869         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3870
3871         return 0;
3872
3873 err:
3874         return rc;
3875 }
3876
3877 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3878 {
3879         void __iomem *dst;
3880
3881         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3882
3883         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3884 }
3885
3886 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3887 {
3888         void __iomem *dst;
3889
3890         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3891
3892         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3893 }
3894
3895 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3896 {
3897         struct dynamic_fw_load_mgr *dynamic_loader;
3898         struct cpu_dyn_regs *dyn_regs;
3899
3900         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3901
3902         /*
3903          * here we update initial values for few specific dynamic regs (as
3904          * before reading the first descriptor from FW those value has to be
3905          * hard-coded) in later stages of the protocol those values will be
3906          * updated automatically by reading the FW descriptor so data there
3907          * will always be up-to-date
3908          */
3909         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3910         dyn_regs->kmd_msg_to_cpu =
3911                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3912         dyn_regs->cpu_cmd_status_to_host =
3913                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3914
3915         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3916 }
3917
3918 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3919 {
3920         struct static_fw_load_mgr *static_loader;
3921
3922         static_loader = &hdev->fw_loader.static_loader;
3923
3924         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3925         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3926         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3927         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3928         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3929         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3930         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3931         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3932         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3933         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3934         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3935         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3936         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3937                         GAUDI_PLDM_RESET_WAIT_MSEC :
3938                         GAUDI_CPU_RESET_WAIT_MSEC;
3939 }
3940
3941 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3942 {
3943         struct asic_fixed_properties *prop = &hdev->asic_prop;
3944         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3945
3946         /* fill common fields */
3947         fw_loader->linux_loaded = false;
3948         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3949         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3950         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3951         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3952         fw_loader->skip_bmc = !hdev->bmc_enable;
3953         fw_loader->sram_bar_id = SRAM_BAR_ID;
3954         fw_loader->dram_bar_id = HBM_BAR_ID;
3955
3956         if (prop->dynamic_fw_load)
3957                 gaudi_init_dynamic_firmware_loader(hdev);
3958         else
3959                 gaudi_init_static_firmware_loader(hdev);
3960 }
3961
3962 static int gaudi_init_cpu(struct hl_device *hdev)
3963 {
3964         struct gaudi_device *gaudi = hdev->asic_specific;
3965         int rc;
3966
3967         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3968                 return 0;
3969
3970         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3971                 return 0;
3972
3973         /*
3974          * The device CPU works with 40 bits addresses.
3975          * This register sets the extension to 50 bits.
3976          */
3977         if (!hdev->asic_prop.fw_security_enabled)
3978                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3979
3980         rc = hl_fw_init_cpu(hdev);
3981
3982         if (rc)
3983                 return rc;
3984
3985         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3986
3987         return 0;
3988 }
3989
3990 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3991 {
3992         struct cpu_dyn_regs *dyn_regs =
3993                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3994         struct asic_fixed_properties *prop = &hdev->asic_prop;
3995         struct gaudi_device *gaudi = hdev->asic_specific;
3996         u32 status, irq_handler_offset;
3997         struct hl_eq *eq;
3998         struct hl_hw_queue *cpu_pq =
3999                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4000         int err;
4001
4002         if (!hdev->cpu_queues_enable)
4003                 return 0;
4004
4005         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4006                 return 0;
4007
4008         eq = &hdev->event_queue;
4009
4010         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4011         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4012
4013         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4014         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4015
4016         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4017                         lower_32_bits(hdev->cpu_accessible_dma_address));
4018         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4019                         upper_32_bits(hdev->cpu_accessible_dma_address));
4020
4021         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4022         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4023         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4024
4025         /* Used for EQ CI */
4026         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4027
4028         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4029
4030         if (gaudi->multi_msi_mode)
4031                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4032         else
4033                 WREG32(mmCPU_IF_QUEUE_INIT,
4034                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4035
4036         irq_handler_offset = prop->gic_interrupts_enable ?
4037                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4038                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4039
4040         WREG32(irq_handler_offset,
4041                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4042
4043         err = hl_poll_timeout(
4044                 hdev,
4045                 mmCPU_IF_QUEUE_INIT,
4046                 status,
4047                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4048                 1000,
4049                 cpu_timeout);
4050
4051         if (err) {
4052                 dev_err(hdev->dev,
4053                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4054                 return -EIO;
4055         }
4056
4057         /* update FW application security bits */
4058         if (prop->fw_cpu_boot_dev_sts0_valid)
4059                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4060         if (prop->fw_cpu_boot_dev_sts1_valid)
4061                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4062
4063         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4064         return 0;
4065 }
4066
4067 static void gaudi_pre_hw_init(struct hl_device *hdev)
4068 {
4069         /* Perform read from the device to make sure device is up */
4070         RREG32(mmHW_STATE);
4071
4072         if (!hdev->asic_prop.fw_security_enabled) {
4073                 /* Set the access through PCI bars (Linux driver only) as
4074                  * secured
4075                  */
4076                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4077                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4078                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4079
4080                 /* Perform read to flush the waiting writes to ensure
4081                  * configuration was set in the device
4082                  */
4083                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4084         }
4085
4086         /*
4087          * Let's mark in the H/W that we have reached this point. We check
4088          * this value in the reset_before_init function to understand whether
4089          * we need to reset the chip before doing H/W init. This register is
4090          * cleared by the H/W upon H/W reset
4091          */
4092         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4093 }
4094
4095 static int gaudi_hw_init(struct hl_device *hdev)
4096 {
4097         struct gaudi_device *gaudi = hdev->asic_specific;
4098         int rc;
4099
4100         gaudi_pre_hw_init(hdev);
4101
4102         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4103          * So we set it here and if anyone tries to move it later to
4104          * a different address, there will be an error
4105          */
4106         if (hdev->asic_prop.iatu_done_by_fw)
4107                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4108
4109         /*
4110          * Before pushing u-boot/linux to device, need to set the hbm bar to
4111          * base address of dram
4112          */
4113         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4114                 dev_err(hdev->dev,
4115                         "failed to map HBM bar to DRAM base address\n");
4116                 return -EIO;
4117         }
4118
4119         rc = gaudi_init_cpu(hdev);
4120         if (rc) {
4121                 dev_err(hdev->dev, "failed to initialize CPU\n");
4122                 return rc;
4123         }
4124
4125         /* In case the clock gating was enabled in preboot we need to disable
4126          * it here before touching the MME/TPC registers.
4127          * There is no need to take clk gating mutex because when this function
4128          * runs, no other relevant code can run
4129          */
4130         hdev->asic_funcs->disable_clock_gating(hdev);
4131
4132         /* SRAM scrambler must be initialized after CPU is running from HBM */
4133         gaudi_init_scrambler_sram(hdev);
4134
4135         /* This is here just in case we are working without CPU */
4136         gaudi_init_scrambler_hbm(hdev);
4137
4138         gaudi_init_golden_registers(hdev);
4139
4140         rc = gaudi_mmu_init(hdev);
4141         if (rc)
4142                 return rc;
4143
4144         gaudi_init_security(hdev);
4145
4146         gaudi_init_pci_dma_qmans(hdev);
4147
4148         gaudi_init_hbm_dma_qmans(hdev);
4149
4150         gaudi_init_mme_qmans(hdev);
4151
4152         gaudi_init_tpc_qmans(hdev);
4153
4154         gaudi_init_nic_qmans(hdev);
4155
4156         hdev->asic_funcs->set_clock_gating(hdev);
4157
4158         gaudi_enable_timestamp(hdev);
4159
4160         /* MSI must be enabled before CPU queues and NIC are initialized */
4161         rc = gaudi_enable_msi(hdev);
4162         if (rc)
4163                 goto disable_queues;
4164
4165         /* must be called after MSI was enabled */
4166         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4167         if (rc) {
4168                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4169                         rc);
4170                 goto disable_msi;
4171         }
4172
4173         /* Perform read from the device to flush all configuration */
4174         RREG32(mmHW_STATE);
4175
4176         return 0;
4177
4178 disable_msi:
4179         gaudi_disable_msi(hdev);
4180 disable_queues:
4181         gaudi_disable_mme_qmans(hdev);
4182         gaudi_disable_pci_dma_qmans(hdev);
4183
4184         return rc;
4185 }
4186
4187 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4188 {
4189         struct cpu_dyn_regs *dyn_regs =
4190                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4191         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4192         struct gaudi_device *gaudi = hdev->asic_specific;
4193         bool driver_performs_reset;
4194
4195         if (!hard_reset) {
4196                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4197                 return;
4198         }
4199
4200         if (hdev->pldm) {
4201                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4202                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4203         } else {
4204                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4205                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4206         }
4207
4208         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4209                                         !hdev->asic_prop.hard_reset_done_by_fw);
4210
4211         /* Set device to handle FLR by H/W as we will put the device CPU to
4212          * halt mode
4213          */
4214         if (driver_performs_reset)
4215                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4216                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4217
4218         /* If linux is loaded in the device CPU we need to communicate with it
4219          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4220          * registers in case of old F/Ws
4221          */
4222         if (hdev->fw_loader.linux_loaded) {
4223                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4224                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4225                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4226
4227                 WREG32(irq_handler_offset,
4228                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4229         } else {
4230                 if (hdev->asic_prop.hard_reset_done_by_fw)
4231                         hl_fw_ask_hard_reset_without_linux(hdev);
4232                 else
4233                         hl_fw_ask_halt_machine_without_linux(hdev);
4234         }
4235
4236         if (driver_performs_reset) {
4237
4238                 /* Configure the reset registers. Must be done as early as
4239                  * possible in case we fail during H/W initialization
4240                  */
4241                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4242                                                 (CFG_RST_H_DMA_MASK |
4243                                                 CFG_RST_H_MME_MASK |
4244                                                 CFG_RST_H_SM_MASK |
4245                                                 CFG_RST_H_TPC_7_MASK));
4246
4247                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4248
4249                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4250                                                 (CFG_RST_H_HBM_MASK |
4251                                                 CFG_RST_H_TPC_7_MASK |
4252                                                 CFG_RST_H_NIC_MASK |
4253                                                 CFG_RST_H_SM_MASK |
4254                                                 CFG_RST_H_DMA_MASK |
4255                                                 CFG_RST_H_MME_MASK |
4256                                                 CFG_RST_H_CPU_MASK |
4257                                                 CFG_RST_H_MMU_MASK));
4258
4259                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4260                                                 (CFG_RST_L_IF_MASK |
4261                                                 CFG_RST_L_PSOC_MASK |
4262                                                 CFG_RST_L_TPC_MASK));
4263
4264                 msleep(cpu_timeout_ms);
4265
4266                 /* Tell ASIC not to re-initialize PCIe */
4267                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4268
4269                 /* Restart BTL/BLR upon hard-reset */
4270                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4271
4272                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4273                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4274
4275                 dev_info(hdev->dev,
4276                         "Issued HARD reset command, going to wait %dms\n",
4277                         reset_timeout_ms);
4278         } else {
4279                 dev_info(hdev->dev,
4280                         "Firmware performs HARD reset, going to wait %dms\n",
4281                         reset_timeout_ms);
4282         }
4283
4284         /*
4285          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4286          * itself is in reset. Need to wait until the reset is deasserted
4287          */
4288         msleep(reset_timeout_ms);
4289
4290         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4291         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4292                 dev_err(hdev->dev,
4293                         "Timeout while waiting for device to reset 0x%x\n",
4294                         status);
4295
4296         if (gaudi) {
4297                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4298                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4299                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4300                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4301                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4302                                 HW_CAP_SRAM_SCRAMBLER |
4303                                 HW_CAP_HBM_SCRAMBLER |
4304                                 HW_CAP_CLK_GATE);
4305
4306                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4307
4308                 hdev->device_cpu_is_halted = false;
4309         }
4310 }
4311
4312 static int gaudi_suspend(struct hl_device *hdev)
4313 {
4314         int rc;
4315
4316         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4317         if (rc)
4318                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4319
4320         return rc;
4321 }
4322
4323 static int gaudi_resume(struct hl_device *hdev)
4324 {
4325         return gaudi_init_iatu(hdev);
4326 }
4327
4328 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4329                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4330 {
4331         int rc;
4332
4333         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4334                         VM_DONTCOPY | VM_NORESERVE;
4335
4336         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4337                                 (dma_addr - HOST_PHYS_BASE), size);
4338         if (rc)
4339                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4340
4341         return rc;
4342 }
4343
4344 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4345 {
4346         struct cpu_dyn_regs *dyn_regs =
4347                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4348         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4349         struct gaudi_device *gaudi = hdev->asic_specific;
4350         bool invalid_queue = false;
4351         int dma_id;
4352
4353         switch (hw_queue_id) {
4354         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4355                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4356                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4357                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4358                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4359                 break;
4360
4361         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4362                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4363                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4364                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4365                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4366                 break;
4367
4368         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4369                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4370                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4371                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4372                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4373                 break;
4374
4375         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4376                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4377                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4378                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4379                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4380                 break;
4381
4382         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4383                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4384                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4385                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4386                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4387                 break;
4388
4389         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4390                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4391                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4392                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4393                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4394                 break;
4395
4396         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4397                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4398                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4399                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4400                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4401                 break;
4402
4403         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4404                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4405                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4406                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4407                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4408                 break;
4409
4410         case GAUDI_QUEUE_ID_CPU_PQ:
4411                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4412                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4413                 else
4414                         invalid_queue = true;
4415                 break;
4416
4417         case GAUDI_QUEUE_ID_MME_0_0:
4418                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4419                 break;
4420
4421         case GAUDI_QUEUE_ID_MME_0_1:
4422                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4423                 break;
4424
4425         case GAUDI_QUEUE_ID_MME_0_2:
4426                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4427                 break;
4428
4429         case GAUDI_QUEUE_ID_MME_0_3:
4430                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4431                 break;
4432
4433         case GAUDI_QUEUE_ID_MME_1_0:
4434                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4435                 break;
4436
4437         case GAUDI_QUEUE_ID_MME_1_1:
4438                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4439                 break;
4440
4441         case GAUDI_QUEUE_ID_MME_1_2:
4442                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4443                 break;
4444
4445         case GAUDI_QUEUE_ID_MME_1_3:
4446                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4447                 break;
4448
4449         case GAUDI_QUEUE_ID_TPC_0_0:
4450                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4451                 break;
4452
4453         case GAUDI_QUEUE_ID_TPC_0_1:
4454                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4455                 break;
4456
4457         case GAUDI_QUEUE_ID_TPC_0_2:
4458                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4459                 break;
4460
4461         case GAUDI_QUEUE_ID_TPC_0_3:
4462                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4463                 break;
4464
4465         case GAUDI_QUEUE_ID_TPC_1_0:
4466                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4467                 break;
4468
4469         case GAUDI_QUEUE_ID_TPC_1_1:
4470                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4471                 break;
4472
4473         case GAUDI_QUEUE_ID_TPC_1_2:
4474                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4475                 break;
4476
4477         case GAUDI_QUEUE_ID_TPC_1_3:
4478                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4479                 break;
4480
4481         case GAUDI_QUEUE_ID_TPC_2_0:
4482                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4483                 break;
4484
4485         case GAUDI_QUEUE_ID_TPC_2_1:
4486                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4487                 break;
4488
4489         case GAUDI_QUEUE_ID_TPC_2_2:
4490                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4491                 break;
4492
4493         case GAUDI_QUEUE_ID_TPC_2_3:
4494                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4495                 break;
4496
4497         case GAUDI_QUEUE_ID_TPC_3_0:
4498                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4499                 break;
4500
4501         case GAUDI_QUEUE_ID_TPC_3_1:
4502                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4503                 break;
4504
4505         case GAUDI_QUEUE_ID_TPC_3_2:
4506                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4507                 break;
4508
4509         case GAUDI_QUEUE_ID_TPC_3_3:
4510                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4511                 break;
4512
4513         case GAUDI_QUEUE_ID_TPC_4_0:
4514                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4515                 break;
4516
4517         case GAUDI_QUEUE_ID_TPC_4_1:
4518                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4519                 break;
4520
4521         case GAUDI_QUEUE_ID_TPC_4_2:
4522                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4523                 break;
4524
4525         case GAUDI_QUEUE_ID_TPC_4_3:
4526                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4527                 break;
4528
4529         case GAUDI_QUEUE_ID_TPC_5_0:
4530                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4531                 break;
4532
4533         case GAUDI_QUEUE_ID_TPC_5_1:
4534                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4535                 break;
4536
4537         case GAUDI_QUEUE_ID_TPC_5_2:
4538                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4539                 break;
4540
4541         case GAUDI_QUEUE_ID_TPC_5_3:
4542                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4543                 break;
4544
4545         case GAUDI_QUEUE_ID_TPC_6_0:
4546                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4547                 break;
4548
4549         case GAUDI_QUEUE_ID_TPC_6_1:
4550                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4551                 break;
4552
4553         case GAUDI_QUEUE_ID_TPC_6_2:
4554                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4555                 break;
4556
4557         case GAUDI_QUEUE_ID_TPC_6_3:
4558                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4559                 break;
4560
4561         case GAUDI_QUEUE_ID_TPC_7_0:
4562                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4563                 break;
4564
4565         case GAUDI_QUEUE_ID_TPC_7_1:
4566                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4567                 break;
4568
4569         case GAUDI_QUEUE_ID_TPC_7_2:
4570                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4571                 break;
4572
4573         case GAUDI_QUEUE_ID_TPC_7_3:
4574                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4575                 break;
4576
4577         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4578                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4579                         invalid_queue = true;
4580
4581                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4582                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4583                 break;
4584
4585         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4586                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4587                         invalid_queue = true;
4588
4589                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4590                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4591                 break;
4592
4593         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4594                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4595                         invalid_queue = true;
4596
4597                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4598                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4599                 break;
4600
4601         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4602                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4603                         invalid_queue = true;
4604
4605                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4606                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4607                 break;
4608
4609         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4610                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4611                         invalid_queue = true;
4612
4613                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4614                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4615                 break;
4616
4617         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4618                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4619                         invalid_queue = true;
4620
4621                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4622                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4623                 break;
4624
4625         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4626                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4627                         invalid_queue = true;
4628
4629                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4630                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4631                 break;
4632
4633         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4634                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4635                         invalid_queue = true;
4636
4637                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4638                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4639                 break;
4640
4641         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4642                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4643                         invalid_queue = true;
4644
4645                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4646                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4647                 break;
4648
4649         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4650                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4651                         invalid_queue = true;
4652
4653                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4654                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4655                 break;
4656
4657         default:
4658                 invalid_queue = true;
4659         }
4660
4661         if (invalid_queue) {
4662                 /* Should never get here */
4663                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4664                         hw_queue_id);
4665                 return;
4666         }
4667
4668         db_value = pi;
4669
4670         /* ring the doorbell */
4671         WREG32(db_reg_offset, db_value);
4672
4673         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4674                 /* make sure device CPU will read latest data from host */
4675                 mb();
4676
4677                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4678                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4679                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4680
4681                 WREG32(irq_handler_offset,
4682                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4683         }
4684 }
4685
4686 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4687                                 struct hl_bd *bd)
4688 {
4689         __le64 *pbd = (__le64 *) bd;
4690
4691         /* The QMANs are on the host memory so a simple copy suffice */
4692         pqe[0] = pbd[0];
4693         pqe[1] = pbd[1];
4694 }
4695
4696 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4697                                         dma_addr_t *dma_handle, gfp_t flags)
4698 {
4699         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4700                                                 dma_handle, flags);
4701
4702         /* Shift to the device's base physical address of host memory */
4703         if (kernel_addr)
4704                 *dma_handle += HOST_PHYS_BASE;
4705
4706         return kernel_addr;
4707 }
4708
4709 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4710                 void *cpu_addr, dma_addr_t dma_handle)
4711 {
4712         /* Cancel the device's base physical address of host memory */
4713         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4714
4715         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4716 }
4717
4718 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4719 {
4720         struct asic_fixed_properties *prop = &hdev->asic_prop;
4721         u64  cur_addr = DRAM_BASE_ADDR_USER;
4722         u32 val;
4723         u32 chunk_size;
4724         int rc, dma_id;
4725
4726         while (cur_addr < prop->dram_end_address) {
4727                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4728                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4729
4730                         chunk_size =
4731                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4732
4733                         dev_dbg(hdev->dev,
4734                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4735                                 cur_addr, cur_addr + chunk_size);
4736
4737                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4738                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4739                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4740                                                 lower_32_bits(cur_addr));
4741                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4742                                                 upper_32_bits(cur_addr));
4743                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4744                                         chunk_size);
4745                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4746                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4747                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4748
4749                         cur_addr += chunk_size;
4750
4751                         if (cur_addr == prop->dram_end_address)
4752                                 break;
4753                 }
4754
4755                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4756                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4757
4758                         rc = hl_poll_timeout(
4759                                 hdev,
4760                                 mmDMA0_CORE_STS0 + dma_offset,
4761                                 val,
4762                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4763                                 1000,
4764                                 HBM_SCRUBBING_TIMEOUT_US);
4765
4766                         if (rc) {
4767                                 dev_err(hdev->dev,
4768                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4769                                         dma_id);
4770                                 return -EIO;
4771                         }
4772                 }
4773         }
4774
4775         return 0;
4776 }
4777
4778 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4779 {
4780         struct asic_fixed_properties *prop = &hdev->asic_prop;
4781         struct gaudi_device *gaudi = hdev->asic_specific;
4782         int rc = 0;
4783         u64 val = 0;
4784
4785         if (!hdev->memory_scrub)
4786                 return 0;
4787
4788         if (!addr && !size) {
4789                 /* Wait till device is idle */
4790                 rc = hl_poll_timeout(
4791                                 hdev,
4792                                 mmDMA0_CORE_STS0/* dummy */,
4793                                 val/* dummy */,
4794                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4795                                                 0, NULL)),
4796                                                 1000,
4797                                                 HBM_SCRUBBING_TIMEOUT_US);
4798                 if (rc) {
4799                         dev_err(hdev->dev, "waiting for idle timeout\n");
4800                         return -EIO;
4801                 }
4802
4803                 /* Scrub SRAM */
4804                 addr = prop->sram_user_base_address;
4805                 size = hdev->pldm ? 0x10000 :
4806                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4807                 val = 0x7777777777777777ull;
4808
4809                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4810                 if (rc) {
4811                         dev_err(hdev->dev,
4812                                 "Failed to clear SRAM in mem scrub all\n");
4813                         return rc;
4814                 }
4815
4816                 mutex_lock(&gaudi->clk_gate_mutex);
4817                 hdev->asic_funcs->disable_clock_gating(hdev);
4818
4819                 /* Scrub HBM using all DMA channels in parallel */
4820                 rc = gaudi_hbm_scrubbing(hdev);
4821                 if (rc)
4822                         dev_err(hdev->dev,
4823                                 "Failed to clear HBM in mem scrub all\n");
4824
4825                 hdev->asic_funcs->set_clock_gating(hdev);
4826                 mutex_unlock(&gaudi->clk_gate_mutex);
4827         }
4828
4829         return rc;
4830 }
4831
4832 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4833                                 u32 queue_id, dma_addr_t *dma_handle,
4834                                 u16 *queue_len)
4835 {
4836         struct gaudi_device *gaudi = hdev->asic_specific;
4837         struct gaudi_internal_qman_info *q;
4838
4839         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4840                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4841                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4842                 return NULL;
4843         }
4844
4845         q = &gaudi->internal_qmans[queue_id];
4846         *dma_handle = q->pq_dma_addr;
4847         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4848
4849         return q->pq_kernel_addr;
4850 }
4851
4852 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4853                                 u16 len, u32 timeout, u64 *result)
4854 {
4855         struct gaudi_device *gaudi = hdev->asic_specific;
4856
4857         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4858                 if (result)
4859                         *result = 0;
4860                 return 0;
4861         }
4862
4863         if (!timeout)
4864                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4865
4866         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4867                                                 timeout, result);
4868 }
4869
4870 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4871 {
4872         struct packet_msg_prot *fence_pkt;
4873         dma_addr_t pkt_dma_addr;
4874         u32 fence_val, tmp, timeout_usec;
4875         dma_addr_t fence_dma_addr;
4876         u32 *fence_ptr;
4877         int rc;
4878
4879         if (hdev->pldm)
4880                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4881         else
4882                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4883
4884         fence_val = GAUDI_QMAN0_FENCE_VAL;
4885
4886         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4887                                                         &fence_dma_addr);
4888         if (!fence_ptr) {
4889                 dev_err(hdev->dev,
4890                         "Failed to allocate memory for H/W queue %d testing\n",
4891                         hw_queue_id);
4892                 return -ENOMEM;
4893         }
4894
4895         *fence_ptr = 0;
4896
4897         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4898                                         sizeof(struct packet_msg_prot),
4899                                         GFP_KERNEL, &pkt_dma_addr);
4900         if (!fence_pkt) {
4901                 dev_err(hdev->dev,
4902                         "Failed to allocate packet for H/W queue %d testing\n",
4903                         hw_queue_id);
4904                 rc = -ENOMEM;
4905                 goto free_fence_ptr;
4906         }
4907
4908         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4909         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4910         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4911
4912         fence_pkt->ctl = cpu_to_le32(tmp);
4913         fence_pkt->value = cpu_to_le32(fence_val);
4914         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4915
4916         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4917                                         sizeof(struct packet_msg_prot),
4918                                         pkt_dma_addr);
4919         if (rc) {
4920                 dev_err(hdev->dev,
4921                         "Failed to send fence packet to H/W queue %d\n",
4922                         hw_queue_id);
4923                 goto free_pkt;
4924         }
4925
4926         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4927                                         1000, timeout_usec, true);
4928
4929         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4930
4931         if (rc == -ETIMEDOUT) {
4932                 dev_err(hdev->dev,
4933                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4934                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4935                 rc = -EIO;
4936         }
4937
4938 free_pkt:
4939         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4940                                         pkt_dma_addr);
4941 free_fence_ptr:
4942         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4943                                         fence_dma_addr);
4944         return rc;
4945 }
4946
4947 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4948 {
4949         struct gaudi_device *gaudi = hdev->asic_specific;
4950
4951         /*
4952          * check capability here as send_cpu_message() won't update the result
4953          * value if no capability
4954          */
4955         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4956                 return 0;
4957
4958         return hl_fw_test_cpu_queue(hdev);
4959 }
4960
4961 static int gaudi_test_queues(struct hl_device *hdev)
4962 {
4963         int i, rc, ret_val = 0;
4964
4965         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4966                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4967                         rc = gaudi_test_queue(hdev, i);
4968                         if (rc)
4969                                 ret_val = -EINVAL;
4970                 }
4971         }
4972
4973         rc = gaudi_test_cpu_queue(hdev);
4974         if (rc)
4975                 ret_val = -EINVAL;
4976
4977         return ret_val;
4978 }
4979
4980 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4981                 gfp_t mem_flags, dma_addr_t *dma_handle)
4982 {
4983         void *kernel_addr;
4984
4985         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4986                 return NULL;
4987
4988         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4989
4990         /* Shift to the device's base physical address of host memory */
4991         if (kernel_addr)
4992                 *dma_handle += HOST_PHYS_BASE;
4993
4994         return kernel_addr;
4995 }
4996
4997 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4998                         dma_addr_t dma_addr)
4999 {
5000         /* Cancel the device's base physical address of host memory */
5001         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5002
5003         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5004 }
5005
5006 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5007                                         size_t size, dma_addr_t *dma_handle)
5008 {
5009         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5010 }
5011
5012 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5013                                                 size_t size, void *vaddr)
5014 {
5015         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5016 }
5017
5018 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5019                         int nents, enum dma_data_direction dir)
5020 {
5021         struct scatterlist *sg;
5022         int i;
5023
5024         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5025                 return -ENOMEM;
5026
5027         /* Shift to the device's base physical address of host memory */
5028         for_each_sg(sgl, sg, nents, i)
5029                 sg->dma_address += HOST_PHYS_BASE;
5030
5031         return 0;
5032 }
5033
5034 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5035                         int nents, enum dma_data_direction dir)
5036 {
5037         struct scatterlist *sg;
5038         int i;
5039
5040         /* Cancel the device's base physical address of host memory */
5041         for_each_sg(sgl, sg, nents, i)
5042                 sg->dma_address -= HOST_PHYS_BASE;
5043
5044         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5045 }
5046
5047 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5048                                         struct sg_table *sgt)
5049 {
5050         struct scatterlist *sg, *sg_next_iter;
5051         u32 count, dma_desc_cnt;
5052         u64 len, len_next;
5053         dma_addr_t addr, addr_next;
5054
5055         dma_desc_cnt = 0;
5056
5057         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5058
5059                 len = sg_dma_len(sg);
5060                 addr = sg_dma_address(sg);
5061
5062                 if (len == 0)
5063                         break;
5064
5065                 while ((count + 1) < sgt->nents) {
5066                         sg_next_iter = sg_next(sg);
5067                         len_next = sg_dma_len(sg_next_iter);
5068                         addr_next = sg_dma_address(sg_next_iter);
5069
5070                         if (len_next == 0)
5071                                 break;
5072
5073                         if ((addr + len == addr_next) &&
5074                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5075                                 len += len_next;
5076                                 count++;
5077                                 sg = sg_next_iter;
5078                         } else {
5079                                 break;
5080                         }
5081                 }
5082
5083                 dma_desc_cnt++;
5084         }
5085
5086         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5087 }
5088
5089 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5090                                 struct hl_cs_parser *parser,
5091                                 struct packet_lin_dma *user_dma_pkt,
5092                                 u64 addr, enum dma_data_direction dir)
5093 {
5094         struct hl_userptr *userptr;
5095         int rc;
5096
5097         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5098                         parser->job_userptr_list, &userptr))
5099                 goto already_pinned;
5100
5101         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5102         if (!userptr)
5103                 return -ENOMEM;
5104
5105         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5106                                 userptr);
5107         if (rc)
5108                 goto free_userptr;
5109
5110         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5111
5112         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5113                                         userptr->sgt->nents, dir);
5114         if (rc) {
5115                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5116                 goto unpin_memory;
5117         }
5118
5119         userptr->dma_mapped = true;
5120         userptr->dir = dir;
5121
5122 already_pinned:
5123         parser->patched_cb_size +=
5124                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5125
5126         return 0;
5127
5128 unpin_memory:
5129         list_del(&userptr->job_node);
5130         hl_unpin_host_memory(hdev, userptr);
5131 free_userptr:
5132         kfree(userptr);
5133         return rc;
5134 }
5135
5136 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5137                                 struct hl_cs_parser *parser,
5138                                 struct packet_lin_dma *user_dma_pkt,
5139                                 bool src_in_host)
5140 {
5141         enum dma_data_direction dir;
5142         bool skip_host_mem_pin = false, user_memset;
5143         u64 addr;
5144         int rc = 0;
5145
5146         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5147                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5148                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5149
5150         if (src_in_host) {
5151                 if (user_memset)
5152                         skip_host_mem_pin = true;
5153
5154                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5155                 dir = DMA_TO_DEVICE;
5156                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5157         } else {
5158                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5159                 dir = DMA_FROM_DEVICE;
5160                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5161                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5162                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5163         }
5164
5165         if (skip_host_mem_pin)
5166                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5167         else
5168                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5169                                                 addr, dir);
5170
5171         return rc;
5172 }
5173
5174 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5175                                 struct hl_cs_parser *parser,
5176                                 struct packet_lin_dma *user_dma_pkt)
5177 {
5178         bool src_in_host = false;
5179         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5180                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5181                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5182
5183         dev_dbg(hdev->dev, "DMA packet details:\n");
5184         dev_dbg(hdev->dev, "source == 0x%llx\n",
5185                                 le64_to_cpu(user_dma_pkt->src_addr));
5186         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5187         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5188
5189         /*
5190          * Special handling for DMA with size 0. Bypass all validations
5191          * because no transactions will be done except for WR_COMP, which
5192          * is not a security issue
5193          */
5194         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5195                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5196                 return 0;
5197         }
5198
5199         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5200                 src_in_host = true;
5201
5202         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5203                                                 src_in_host);
5204 }
5205
5206 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5207                                         struct hl_cs_parser *parser,
5208                                         struct packet_load_and_exe *user_pkt)
5209 {
5210         u32 cfg;
5211
5212         cfg = le32_to_cpu(user_pkt->cfg);
5213
5214         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5215                 dev_err(hdev->dev,
5216                         "User not allowed to use Load and Execute\n");
5217                 return -EPERM;
5218         }
5219
5220         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5221
5222         return 0;
5223 }
5224
5225 static int gaudi_validate_cb(struct hl_device *hdev,
5226                         struct hl_cs_parser *parser, bool is_mmu)
5227 {
5228         u32 cb_parsed_length = 0;
5229         int rc = 0;
5230
5231         parser->patched_cb_size = 0;
5232
5233         /* cb_user_size is more than 0 so loop will always be executed */
5234         while (cb_parsed_length < parser->user_cb_size) {
5235                 enum packet_id pkt_id;
5236                 u16 pkt_size;
5237                 struct gaudi_packet *user_pkt;
5238
5239                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5240
5241                 pkt_id = (enum packet_id) (
5242                                 (le64_to_cpu(user_pkt->header) &
5243                                 PACKET_HEADER_PACKET_ID_MASK) >>
5244                                         PACKET_HEADER_PACKET_ID_SHIFT);
5245
5246                 if (!validate_packet_id(pkt_id)) {
5247                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5248                         rc = -EINVAL;
5249                         break;
5250                 }
5251
5252                 pkt_size = gaudi_packet_sizes[pkt_id];
5253                 cb_parsed_length += pkt_size;
5254                 if (cb_parsed_length > parser->user_cb_size) {
5255                         dev_err(hdev->dev,
5256                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5257                         rc = -EINVAL;
5258                         break;
5259                 }
5260
5261                 switch (pkt_id) {
5262                 case PACKET_MSG_PROT:
5263                         dev_err(hdev->dev,
5264                                 "User not allowed to use MSG_PROT\n");
5265                         rc = -EPERM;
5266                         break;
5267
5268                 case PACKET_CP_DMA:
5269                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5270                         rc = -EPERM;
5271                         break;
5272
5273                 case PACKET_STOP:
5274                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5275                         rc = -EPERM;
5276                         break;
5277
5278                 case PACKET_WREG_BULK:
5279                         dev_err(hdev->dev,
5280                                 "User not allowed to use WREG_BULK\n");
5281                         rc = -EPERM;
5282                         break;
5283
5284                 case PACKET_LOAD_AND_EXE:
5285                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5286                                 (struct packet_load_and_exe *) user_pkt);
5287                         break;
5288
5289                 case PACKET_LIN_DMA:
5290                         parser->contains_dma_pkt = true;
5291                         if (is_mmu)
5292                                 parser->patched_cb_size += pkt_size;
5293                         else
5294                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5295                                         (struct packet_lin_dma *) user_pkt);
5296                         break;
5297
5298                 case PACKET_WREG_32:
5299                 case PACKET_MSG_LONG:
5300                 case PACKET_MSG_SHORT:
5301                 case PACKET_REPEAT:
5302                 case PACKET_FENCE:
5303                 case PACKET_NOP:
5304                 case PACKET_ARB_POINT:
5305                         parser->patched_cb_size += pkt_size;
5306                         break;
5307
5308                 default:
5309                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5310                                 pkt_id);
5311                         rc = -EINVAL;
5312                         break;
5313                 }
5314
5315                 if (rc)
5316                         break;
5317         }
5318
5319         /*
5320          * The new CB should have space at the end for two MSG_PROT packets:
5321          * 1. A packet that will act as a completion packet
5322          * 2. A packet that will generate MSI-X interrupt
5323          */
5324         if (parser->completion)
5325                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5326
5327         return rc;
5328 }
5329
5330 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5331                                 struct hl_cs_parser *parser,
5332                                 struct packet_lin_dma *user_dma_pkt,
5333                                 struct packet_lin_dma *new_dma_pkt,
5334                                 u32 *new_dma_pkt_size)
5335 {
5336         struct hl_userptr *userptr;
5337         struct scatterlist *sg, *sg_next_iter;
5338         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5339         u64 len, len_next;
5340         dma_addr_t dma_addr, dma_addr_next;
5341         u64 device_memory_addr, addr;
5342         enum dma_data_direction dir;
5343         struct sg_table *sgt;
5344         bool src_in_host = false;
5345         bool skip_host_mem_pin = false;
5346         bool user_memset;
5347
5348         ctl = le32_to_cpu(user_dma_pkt->ctl);
5349
5350         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5351                 src_in_host = true;
5352
5353         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5354                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5355
5356         if (src_in_host) {
5357                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5358                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5359                 dir = DMA_TO_DEVICE;
5360                 if (user_memset)
5361                         skip_host_mem_pin = true;
5362         } else {
5363                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5364                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5365                 dir = DMA_FROM_DEVICE;
5366         }
5367
5368         if ((!skip_host_mem_pin) &&
5369                 (!hl_userptr_is_pinned(hdev, addr,
5370                                         le32_to_cpu(user_dma_pkt->tsize),
5371                                         parser->job_userptr_list, &userptr))) {
5372                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5373                                 addr, user_dma_pkt->tsize);
5374                 return -EFAULT;
5375         }
5376
5377         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5378                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5379                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5380                 return 0;
5381         }
5382
5383         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5384
5385         sgt = userptr->sgt;
5386         dma_desc_cnt = 0;
5387
5388         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5389                 len = sg_dma_len(sg);
5390                 dma_addr = sg_dma_address(sg);
5391
5392                 if (len == 0)
5393                         break;
5394
5395                 while ((count + 1) < sgt->nents) {
5396                         sg_next_iter = sg_next(sg);
5397                         len_next = sg_dma_len(sg_next_iter);
5398                         dma_addr_next = sg_dma_address(sg_next_iter);
5399
5400                         if (len_next == 0)
5401                                 break;
5402
5403                         if ((dma_addr + len == dma_addr_next) &&
5404                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5405                                 len += len_next;
5406                                 count++;
5407                                 sg = sg_next_iter;
5408                         } else {
5409                                 break;
5410                         }
5411                 }
5412
5413                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5414                 if (likely(dma_desc_cnt))
5415                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5416                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5417                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5418                 new_dma_pkt->tsize = cpu_to_le32(len);
5419
5420                 if (dir == DMA_TO_DEVICE) {
5421                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5422                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5423                 } else {
5424                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5425                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5426                 }
5427
5428                 if (!user_memset)
5429                         device_memory_addr += len;
5430                 dma_desc_cnt++;
5431                 new_dma_pkt++;
5432         }
5433
5434         if (!dma_desc_cnt) {
5435                 dev_err(hdev->dev,
5436                         "Error of 0 SG entries when patching DMA packet\n");
5437                 return -EFAULT;
5438         }
5439
5440         /* Fix the last dma packet - wrcomp must be as user set it */
5441         new_dma_pkt--;
5442         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5443
5444         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5445
5446         return 0;
5447 }
5448
5449 static int gaudi_patch_cb(struct hl_device *hdev,
5450                                 struct hl_cs_parser *parser)
5451 {
5452         u32 cb_parsed_length = 0;
5453         u32 cb_patched_cur_length = 0;
5454         int rc = 0;
5455
5456         /* cb_user_size is more than 0 so loop will always be executed */
5457         while (cb_parsed_length < parser->user_cb_size) {
5458                 enum packet_id pkt_id;
5459                 u16 pkt_size;
5460                 u32 new_pkt_size = 0;
5461                 struct gaudi_packet *user_pkt, *kernel_pkt;
5462
5463                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5464                 kernel_pkt = parser->patched_cb->kernel_address +
5465                                         cb_patched_cur_length;
5466
5467                 pkt_id = (enum packet_id) (
5468                                 (le64_to_cpu(user_pkt->header) &
5469                                 PACKET_HEADER_PACKET_ID_MASK) >>
5470                                         PACKET_HEADER_PACKET_ID_SHIFT);
5471
5472                 if (!validate_packet_id(pkt_id)) {
5473                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5474                         rc = -EINVAL;
5475                         break;
5476                 }
5477
5478                 pkt_size = gaudi_packet_sizes[pkt_id];
5479                 cb_parsed_length += pkt_size;
5480                 if (cb_parsed_length > parser->user_cb_size) {
5481                         dev_err(hdev->dev,
5482                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5483                         rc = -EINVAL;
5484                         break;
5485                 }
5486
5487                 switch (pkt_id) {
5488                 case PACKET_LIN_DMA:
5489                         rc = gaudi_patch_dma_packet(hdev, parser,
5490                                         (struct packet_lin_dma *) user_pkt,
5491                                         (struct packet_lin_dma *) kernel_pkt,
5492                                         &new_pkt_size);
5493                         cb_patched_cur_length += new_pkt_size;
5494                         break;
5495
5496                 case PACKET_MSG_PROT:
5497                         dev_err(hdev->dev,
5498                                 "User not allowed to use MSG_PROT\n");
5499                         rc = -EPERM;
5500                         break;
5501
5502                 case PACKET_CP_DMA:
5503                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5504                         rc = -EPERM;
5505                         break;
5506
5507                 case PACKET_STOP:
5508                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5509                         rc = -EPERM;
5510                         break;
5511
5512                 case PACKET_WREG_32:
5513                 case PACKET_WREG_BULK:
5514                 case PACKET_MSG_LONG:
5515                 case PACKET_MSG_SHORT:
5516                 case PACKET_REPEAT:
5517                 case PACKET_FENCE:
5518                 case PACKET_NOP:
5519                 case PACKET_ARB_POINT:
5520                 case PACKET_LOAD_AND_EXE:
5521                         memcpy(kernel_pkt, user_pkt, pkt_size);
5522                         cb_patched_cur_length += pkt_size;
5523                         break;
5524
5525                 default:
5526                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5527                                 pkt_id);
5528                         rc = -EINVAL;
5529                         break;
5530                 }
5531
5532                 if (rc)
5533                         break;
5534         }
5535
5536         return rc;
5537 }
5538
5539 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5540                 struct hl_cs_parser *parser)
5541 {
5542         u64 patched_cb_handle;
5543         u32 patched_cb_size;
5544         struct hl_cb *user_cb;
5545         int rc;
5546
5547         /*
5548          * The new CB should have space at the end for two MSG_PROT pkt:
5549          * 1. A packet that will act as a completion packet
5550          * 2. A packet that will generate MSI interrupt
5551          */
5552         if (parser->completion)
5553                 parser->patched_cb_size = parser->user_cb_size +
5554                                 sizeof(struct packet_msg_prot) * 2;
5555         else
5556                 parser->patched_cb_size = parser->user_cb_size;
5557
5558         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5559                                 parser->patched_cb_size, false, false,
5560                                 &patched_cb_handle);
5561
5562         if (rc) {
5563                 dev_err(hdev->dev,
5564                         "Failed to allocate patched CB for DMA CS %d\n",
5565                         rc);
5566                 return rc;
5567         }
5568
5569         patched_cb_handle >>= PAGE_SHIFT;
5570         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5571                                 (u32) patched_cb_handle);
5572         /* hl_cb_get should never fail */
5573         if (!parser->patched_cb) {
5574                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5575                         (u32) patched_cb_handle);
5576                 rc = -EFAULT;
5577                 goto out;
5578         }
5579
5580         /*
5581          * The check that parser->user_cb_size <= parser->user_cb->size was done
5582          * in validate_queue_index().
5583          */
5584         memcpy(parser->patched_cb->kernel_address,
5585                 parser->user_cb->kernel_address,
5586                 parser->user_cb_size);
5587
5588         patched_cb_size = parser->patched_cb_size;
5589
5590         /* Validate patched CB instead of user CB */
5591         user_cb = parser->user_cb;
5592         parser->user_cb = parser->patched_cb;
5593         rc = gaudi_validate_cb(hdev, parser, true);
5594         parser->user_cb = user_cb;
5595
5596         if (rc) {
5597                 hl_cb_put(parser->patched_cb);
5598                 goto out;
5599         }
5600
5601         if (patched_cb_size != parser->patched_cb_size) {
5602                 dev_err(hdev->dev, "user CB size mismatch\n");
5603                 hl_cb_put(parser->patched_cb);
5604                 rc = -EINVAL;
5605                 goto out;
5606         }
5607
5608 out:
5609         /*
5610          * Always call cb destroy here because we still have 1 reference
5611          * to it by calling cb_get earlier. After the job will be completed,
5612          * cb_put will release it, but here we want to remove it from the
5613          * idr
5614          */
5615         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5616                                         patched_cb_handle << PAGE_SHIFT);
5617
5618         return rc;
5619 }
5620
5621 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5622                 struct hl_cs_parser *parser)
5623 {
5624         u64 patched_cb_handle;
5625         int rc;
5626
5627         rc = gaudi_validate_cb(hdev, parser, false);
5628
5629         if (rc)
5630                 goto free_userptr;
5631
5632         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5633                                 parser->patched_cb_size, false, false,
5634                                 &patched_cb_handle);
5635         if (rc) {
5636                 dev_err(hdev->dev,
5637                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5638                 goto free_userptr;
5639         }
5640
5641         patched_cb_handle >>= PAGE_SHIFT;
5642         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5643                                 (u32) patched_cb_handle);
5644         /* hl_cb_get should never fail here */
5645         if (!parser->patched_cb) {
5646                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5647                                 (u32) patched_cb_handle);
5648                 rc = -EFAULT;
5649                 goto out;
5650         }
5651
5652         rc = gaudi_patch_cb(hdev, parser);
5653
5654         if (rc)
5655                 hl_cb_put(parser->patched_cb);
5656
5657 out:
5658         /*
5659          * Always call cb destroy here because we still have 1 reference
5660          * to it by calling cb_get earlier. After the job will be completed,
5661          * cb_put will release it, but here we want to remove it from the
5662          * idr
5663          */
5664         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5665                                 patched_cb_handle << PAGE_SHIFT);
5666
5667 free_userptr:
5668         if (rc)
5669                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5670         return rc;
5671 }
5672
5673 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5674                                         struct hl_cs_parser *parser)
5675 {
5676         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5677         struct gaudi_device *gaudi = hdev->asic_specific;
5678         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5679                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5680
5681         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5682                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5683                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5684                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5685                                 parser->hw_queue_id);
5686                 return -EINVAL;
5687         }
5688
5689         /* For internal queue jobs just check if CB address is valid */
5690         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5691                                         parser->user_cb_size,
5692                                         asic_prop->sram_user_base_address,
5693                                         asic_prop->sram_end_address))
5694                 return 0;
5695
5696         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5697                                         parser->user_cb_size,
5698                                         asic_prop->dram_user_base_address,
5699                                         asic_prop->dram_end_address))
5700                 return 0;
5701
5702         /* PMMU and HPMMU addresses are equal, check only one of them */
5703         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5704                                         parser->user_cb_size,
5705                                         asic_prop->pmmu.start_addr,
5706                                         asic_prop->pmmu.end_addr))
5707                 return 0;
5708
5709         dev_err(hdev->dev,
5710                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5711                 parser->user_cb, parser->user_cb_size);
5712
5713         return -EFAULT;
5714 }
5715
5716 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5717 {
5718         struct gaudi_device *gaudi = hdev->asic_specific;
5719
5720         if (parser->queue_type == QUEUE_TYPE_INT)
5721                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5722
5723         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5724                 return gaudi_parse_cb_mmu(hdev, parser);
5725         else
5726                 return gaudi_parse_cb_no_mmu(hdev, parser);
5727 }
5728
5729 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5730                                         void *kernel_address, u32 len,
5731                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5732                                         bool eb)
5733 {
5734         struct gaudi_device *gaudi = hdev->asic_specific;
5735         struct packet_msg_prot *cq_pkt;
5736         u32 tmp;
5737
5738         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5739
5740         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5741         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5742
5743         if (eb)
5744                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5745
5746         cq_pkt->ctl = cpu_to_le32(tmp);
5747         cq_pkt->value = cpu_to_le32(cq_val);
5748         cq_pkt->addr = cpu_to_le64(cq_addr);
5749
5750         cq_pkt++;
5751
5752         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5753         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5754         cq_pkt->ctl = cpu_to_le32(tmp);
5755         cq_pkt->value = cpu_to_le32(1);
5756
5757         if (!gaudi->multi_msi_mode)
5758                 msi_vec = 0;
5759
5760         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5761 }
5762
5763 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5764 {
5765         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5766 }
5767
5768 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5769                                         u32 size, u64 val)
5770 {
5771         struct packet_lin_dma *lin_dma_pkt;
5772         struct hl_cs_job *job;
5773         u32 cb_size, ctl, err_cause;
5774         struct hl_cb *cb;
5775         u64 id;
5776         int rc;
5777
5778         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5779         if (!cb)
5780                 return -EFAULT;
5781
5782         lin_dma_pkt = cb->kernel_address;
5783         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5784         cb_size = sizeof(*lin_dma_pkt);
5785
5786         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5787         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5788         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5789         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5790         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5791
5792         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5793         lin_dma_pkt->src_addr = cpu_to_le64(val);
5794         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5795         lin_dma_pkt->tsize = cpu_to_le32(size);
5796
5797         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5798         if (!job) {
5799                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5800                 rc = -ENOMEM;
5801                 goto release_cb;
5802         }
5803
5804         /* Verify DMA is OK */
5805         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5806         if (err_cause && !hdev->init_done) {
5807                 dev_dbg(hdev->dev,
5808                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5809                         err_cause);
5810                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5811         }
5812
5813         job->id = 0;
5814         job->user_cb = cb;
5815         atomic_inc(&job->user_cb->cs_cnt);
5816         job->user_cb_size = cb_size;
5817         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5818         job->patched_cb = job->user_cb;
5819         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5820
5821         hl_debugfs_add_job(hdev, job);
5822
5823         rc = gaudi_send_job_on_qman0(hdev, job);
5824         hl_debugfs_remove_job(hdev, job);
5825         kfree(job);
5826         atomic_dec(&cb->cs_cnt);
5827
5828         /* Verify DMA is OK */
5829         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5830         if (err_cause) {
5831                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5832                 rc = -EIO;
5833                 if (!hdev->init_done) {
5834                         dev_dbg(hdev->dev,
5835                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5836                                 err_cause);
5837                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5838                 }
5839         }
5840
5841 release_cb:
5842         id = cb->id;
5843         hl_cb_put(cb);
5844         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5845
5846         return rc;
5847 }
5848
5849 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5850                                         u32 num_regs, u32 val)
5851 {
5852         struct packet_msg_long *pkt;
5853         struct hl_cs_job *job;
5854         u32 cb_size, ctl;
5855         struct hl_cb *cb;
5856         int i, rc;
5857
5858         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5859
5860         if (cb_size > SZ_2M) {
5861                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5862                 return -ENOMEM;
5863         }
5864
5865         cb = hl_cb_kernel_create(hdev, cb_size, false);
5866         if (!cb)
5867                 return -EFAULT;
5868
5869         pkt = cb->kernel_address;
5870
5871         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5872         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5873         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5874         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5875         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5876
5877         for (i = 0; i < num_regs ; i++, pkt++) {
5878                 pkt->ctl = cpu_to_le32(ctl);
5879                 pkt->value = cpu_to_le32(val);
5880                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5881         }
5882
5883         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5884         if (!job) {
5885                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5886                 rc = -ENOMEM;
5887                 goto release_cb;
5888         }
5889
5890         job->id = 0;
5891         job->user_cb = cb;
5892         atomic_inc(&job->user_cb->cs_cnt);
5893         job->user_cb_size = cb_size;
5894         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5895         job->patched_cb = job->user_cb;
5896         job->job_cb_size = cb_size;
5897
5898         hl_debugfs_add_job(hdev, job);
5899
5900         rc = gaudi_send_job_on_qman0(hdev, job);
5901         hl_debugfs_remove_job(hdev, job);
5902         kfree(job);
5903         atomic_dec(&cb->cs_cnt);
5904
5905 release_cb:
5906         hl_cb_put(cb);
5907         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5908
5909         return rc;
5910 }
5911
5912 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5913                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5914 {
5915         struct hl_ctx *ctx;
5916         struct hl_pending_cb *pending_cb;
5917         struct packet_msg_long *pkt;
5918         u32 cb_size, ctl;
5919         struct hl_cb *cb;
5920         int i, rc;
5921
5922         mutex_lock(&hdev->fpriv_list_lock);
5923         ctx = hdev->compute_ctx;
5924
5925         /* If no compute context available or context is going down
5926          * memset registers directly
5927          */
5928         if (!ctx || kref_read(&ctx->refcount) == 0) {
5929                 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5930                 mutex_unlock(&hdev->fpriv_list_lock);
5931                 return rc;
5932         }
5933
5934         mutex_unlock(&hdev->fpriv_list_lock);
5935
5936         cb_size = (sizeof(*pkt) * num_regs) +
5937                         sizeof(struct packet_msg_prot) * 2;
5938
5939         if (cb_size > SZ_2M) {
5940                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5941                 return -ENOMEM;
5942         }
5943
5944         pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5945         if (!pending_cb)
5946                 return -ENOMEM;
5947
5948         cb = hl_cb_kernel_create(hdev, cb_size, false);
5949         if (!cb) {
5950                 kfree(pending_cb);
5951                 return -EFAULT;
5952         }
5953
5954         pkt = cb->kernel_address;
5955
5956         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5957         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5958         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5959         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5960         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5961
5962         for (i = 0; i < num_regs ; i++, pkt++) {
5963                 pkt->ctl = cpu_to_le32(ctl);
5964                 pkt->value = cpu_to_le32(val);
5965                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5966         }
5967
5968         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5969
5970         pending_cb->cb = cb;
5971         pending_cb->cb_size = cb_size;
5972         /* The queue ID MUST be an external queue ID. Otherwise, we will
5973          * have undefined behavior
5974          */
5975         pending_cb->hw_queue_id = hw_queue_id;
5976
5977         spin_lock(&ctx->pending_cb_lock);
5978         list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5979         spin_unlock(&ctx->pending_cb_lock);
5980
5981         return 0;
5982 }
5983
5984 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5985 {
5986         u64 base_addr;
5987         u32 num_regs;
5988         int rc;
5989
5990         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5991         num_regs = NUM_OF_SOB_IN_BLOCK;
5992         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5993         if (rc) {
5994                 dev_err(hdev->dev, "failed resetting SM registers");
5995                 return -ENOMEM;
5996         }
5997
5998         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5999         num_regs = NUM_OF_SOB_IN_BLOCK;
6000         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6001         if (rc) {
6002                 dev_err(hdev->dev, "failed resetting SM registers");
6003                 return -ENOMEM;
6004         }
6005
6006         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6007         num_regs = NUM_OF_SOB_IN_BLOCK;
6008         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6009         if (rc) {
6010                 dev_err(hdev->dev, "failed resetting SM registers");
6011                 return -ENOMEM;
6012         }
6013
6014         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6015         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6016         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6017         if (rc) {
6018                 dev_err(hdev->dev, "failed resetting SM registers");
6019                 return -ENOMEM;
6020         }
6021
6022         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6023         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6024         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6025         if (rc) {
6026                 dev_err(hdev->dev, "failed resetting SM registers");
6027                 return -ENOMEM;
6028         }
6029
6030         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6031         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6032         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6033         if (rc) {
6034                 dev_err(hdev->dev, "failed resetting SM registers");
6035                 return -ENOMEM;
6036         }
6037
6038         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6039                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6040         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6041         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6042         if (rc) {
6043                 dev_err(hdev->dev, "failed resetting SM registers");
6044                 return -ENOMEM;
6045         }
6046
6047         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6048                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6049         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6050         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6051         if (rc) {
6052                 dev_err(hdev->dev, "failed resetting SM registers");
6053                 return -ENOMEM;
6054         }
6055
6056         return 0;
6057 }
6058
6059 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6060 {
6061         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6062                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6063         int i;
6064
6065         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6066                 u64 sob_addr = CFG_BASE +
6067                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6068                                 (i * sob_delta);
6069                 u32 dma_offset = i * DMA_CORE_OFFSET;
6070
6071                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6072                                 lower_32_bits(sob_addr));
6073                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6074                                 upper_32_bits(sob_addr));
6075                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6076
6077                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6078                  * modified by the user for SRAM reduction
6079                  */
6080                 if (i > 1)
6081                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6082                                                                 0x00000001);
6083         }
6084 }
6085
6086 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6087 {
6088         u32 qman_offset;
6089         int i;
6090
6091         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6092                 qman_offset = i * DMA_QMAN_OFFSET;
6093                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6094         }
6095
6096         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6097                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6098                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6099         }
6100
6101         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6102                 qman_offset = i * TPC_QMAN_OFFSET;
6103                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6104         }
6105
6106         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6107                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6108                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6109                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6110         }
6111 }
6112
6113 static int gaudi_restore_user_registers(struct hl_device *hdev)
6114 {
6115         int rc;
6116
6117         rc = gaudi_restore_sm_registers(hdev);
6118         if (rc)
6119                 return rc;
6120
6121         gaudi_restore_dma_registers(hdev);
6122         gaudi_restore_qm_registers(hdev);
6123
6124         return 0;
6125 }
6126
6127 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6128 {
6129         return gaudi_restore_user_registers(hdev);
6130 }
6131
6132 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6133 {
6134         struct asic_fixed_properties *prop = &hdev->asic_prop;
6135         struct gaudi_device *gaudi = hdev->asic_specific;
6136         u64 addr = prop->mmu_pgt_addr;
6137         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6138
6139         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6140                 return 0;
6141
6142         return gaudi_memset_device_memory(hdev, addr, size, 0);
6143 }
6144
6145 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6146 {
6147
6148 }
6149
6150 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6151                         bool user_address, u32 *val)
6152 {
6153         struct asic_fixed_properties *prop = &hdev->asic_prop;
6154         struct gaudi_device *gaudi = hdev->asic_specific;
6155         u64 hbm_bar_addr, host_phys_end;
6156         int rc = 0;
6157
6158         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6159
6160         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6161
6162                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6163                                 (hdev->clock_gating_mask &
6164                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6165
6166                         dev_err_ratelimited(hdev->dev,
6167                                 "Can't read register - clock gating is enabled!\n");
6168                         rc = -EFAULT;
6169                 } else {
6170                         *val = RREG32(addr - CFG_BASE);
6171                 }
6172
6173         } else if ((addr >= SRAM_BASE_ADDR) &&
6174                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6175                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6176                                 (addr - SRAM_BASE_ADDR));
6177         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6178                 u64 bar_base_addr = DRAM_PHYS_BASE +
6179                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6180
6181                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6182                 if (hbm_bar_addr != U64_MAX) {
6183                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6184                                                 (addr - bar_base_addr));
6185
6186                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6187                                                 hbm_bar_addr);
6188                 }
6189                 if (hbm_bar_addr == U64_MAX)
6190                         rc = -EIO;
6191         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6192                         user_address && !iommu_present(&pci_bus_type)) {
6193                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6194         } else {
6195                 rc = -EFAULT;
6196         }
6197
6198         return rc;
6199 }
6200
6201 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6202                         bool user_address, u32 val)
6203 {
6204         struct asic_fixed_properties *prop = &hdev->asic_prop;
6205         struct gaudi_device *gaudi = hdev->asic_specific;
6206         u64 hbm_bar_addr, host_phys_end;
6207         int rc = 0;
6208
6209         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6210
6211         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6212
6213                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6214                                 (hdev->clock_gating_mask &
6215                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6216
6217                         dev_err_ratelimited(hdev->dev,
6218                                 "Can't write register - clock gating is enabled!\n");
6219                         rc = -EFAULT;
6220                 } else {
6221                         WREG32(addr - CFG_BASE, val);
6222                 }
6223
6224         } else if ((addr >= SRAM_BASE_ADDR) &&
6225                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6226                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6227                                         (addr - SRAM_BASE_ADDR));
6228         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6229                 u64 bar_base_addr = DRAM_PHYS_BASE +
6230                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6231
6232                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6233                 if (hbm_bar_addr != U64_MAX) {
6234                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6235                                                 (addr - bar_base_addr));
6236
6237                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6238                                                 hbm_bar_addr);
6239                 }
6240                 if (hbm_bar_addr == U64_MAX)
6241                         rc = -EIO;
6242         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6243                         user_address && !iommu_present(&pci_bus_type)) {
6244                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6245         } else {
6246                 rc = -EFAULT;
6247         }
6248
6249         return rc;
6250 }
6251
6252 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6253                                 bool user_address, u64 *val)
6254 {
6255         struct asic_fixed_properties *prop = &hdev->asic_prop;
6256         struct gaudi_device *gaudi = hdev->asic_specific;
6257         u64 hbm_bar_addr, host_phys_end;
6258         int rc = 0;
6259
6260         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6261
6262         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6263
6264                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6265                                 (hdev->clock_gating_mask &
6266                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6267
6268                         dev_err_ratelimited(hdev->dev,
6269                                 "Can't read register - clock gating is enabled!\n");
6270                         rc = -EFAULT;
6271                 } else {
6272                         u32 val_l = RREG32(addr - CFG_BASE);
6273                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6274
6275                         *val = (((u64) val_h) << 32) | val_l;
6276                 }
6277
6278         } else if ((addr >= SRAM_BASE_ADDR) &&
6279                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6280                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6281                                 (addr - SRAM_BASE_ADDR));
6282         } else if (addr <=
6283                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6284                 u64 bar_base_addr = DRAM_PHYS_BASE +
6285                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6286
6287                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6288                 if (hbm_bar_addr != U64_MAX) {
6289                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6290                                                 (addr - bar_base_addr));
6291
6292                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6293                                                 hbm_bar_addr);
6294                 }
6295                 if (hbm_bar_addr == U64_MAX)
6296                         rc = -EIO;
6297         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6298                         user_address && !iommu_present(&pci_bus_type)) {
6299                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6300         } else {
6301                 rc = -EFAULT;
6302         }
6303
6304         return rc;
6305 }
6306
6307 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6308                                 bool user_address, u64 val)
6309 {
6310         struct asic_fixed_properties *prop = &hdev->asic_prop;
6311         struct gaudi_device *gaudi = hdev->asic_specific;
6312         u64 hbm_bar_addr, host_phys_end;
6313         int rc = 0;
6314
6315         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6316
6317         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6318
6319                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6320                                 (hdev->clock_gating_mask &
6321                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6322
6323                         dev_err_ratelimited(hdev->dev,
6324                                 "Can't write register - clock gating is enabled!\n");
6325                         rc = -EFAULT;
6326                 } else {
6327                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6328                         WREG32(addr + sizeof(u32) - CFG_BASE,
6329                                 upper_32_bits(val));
6330                 }
6331
6332         } else if ((addr >= SRAM_BASE_ADDR) &&
6333                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6334                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6335                                         (addr - SRAM_BASE_ADDR));
6336         } else if (addr <=
6337                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6338                 u64 bar_base_addr = DRAM_PHYS_BASE +
6339                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6340
6341                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6342                 if (hbm_bar_addr != U64_MAX) {
6343                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6344                                                 (addr - bar_base_addr));
6345
6346                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6347                                                 hbm_bar_addr);
6348                 }
6349                 if (hbm_bar_addr == U64_MAX)
6350                         rc = -EIO;
6351         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6352                         user_address && !iommu_present(&pci_bus_type)) {
6353                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6354         } else {
6355                 rc = -EFAULT;
6356         }
6357
6358         return rc;
6359 }
6360
6361 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6362                                         u32 size_to_dma, dma_addr_t dma_addr)
6363 {
6364         u32 err_cause, val;
6365         u64 dma_offset;
6366         int rc;
6367
6368         dma_offset = dma_id * DMA_CORE_OFFSET;
6369
6370         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6371         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6372         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6373         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6374         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6375         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6376                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6377
6378         rc = hl_poll_timeout(
6379                 hdev,
6380                 mmDMA0_CORE_STS0 + dma_offset,
6381                 val,
6382                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6383                 0,
6384                 1000000);
6385
6386         if (rc) {
6387                 dev_err(hdev->dev,
6388                         "DMA %d timed-out during reading of 0x%llx\n",
6389                         dma_id, addr);
6390                 return -EIO;
6391         }
6392
6393         /* Verify DMA is OK */
6394         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6395         if (err_cause) {
6396                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6397                 dev_dbg(hdev->dev,
6398                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6399                         err_cause);
6400                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6401
6402                 return -EIO;
6403         }
6404
6405         return 0;
6406 }
6407
6408 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6409                                 void *blob_addr)
6410 {
6411         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6412         struct gaudi_device *gaudi = hdev->asic_specific;
6413         u64 dma_offset, qm_offset;
6414         dma_addr_t dma_addr;
6415         void *kernel_addr;
6416         bool is_eng_idle;
6417         int rc = 0, dma_id;
6418
6419         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6420                                                 hdev, SZ_2M,
6421                                                 &dma_addr,
6422                                                 GFP_KERNEL | __GFP_ZERO);
6423
6424         if (!kernel_addr)
6425                 return -ENOMEM;
6426
6427         mutex_lock(&gaudi->clk_gate_mutex);
6428
6429         hdev->asic_funcs->disable_clock_gating(hdev);
6430
6431         hdev->asic_funcs->hw_queues_lock(hdev);
6432
6433         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6434         dma_offset = dma_id * DMA_CORE_OFFSET;
6435         qm_offset = dma_id * DMA_QMAN_OFFSET;
6436         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6437         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6438
6439         if (!is_eng_idle) {
6440                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6441                 dma_offset = dma_id * DMA_CORE_OFFSET;
6442                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6443                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6444                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6445
6446                 if (!is_eng_idle) {
6447                         dev_err_ratelimited(hdev->dev,
6448                                 "Can't read via DMA because it is BUSY\n");
6449                         rc = -EAGAIN;
6450                         goto out;
6451                 }
6452         }
6453
6454         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6455         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6456                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6457
6458         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6459          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6460          * ASID
6461          */
6462         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6463
6464         /* Verify DMA is OK */
6465         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6466         if (err_cause) {
6467                 dev_dbg(hdev->dev,
6468                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6469                         err_cause);
6470                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6471         }
6472
6473         pos = 0;
6474         size_left = size;
6475         size_to_dma = SZ_2M;
6476
6477         while (size_left > 0) {
6478
6479                 if (size_left < SZ_2M)
6480                         size_to_dma = size_left;
6481
6482                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6483                                                 dma_addr);
6484                 if (rc)
6485                         break;
6486
6487                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6488
6489                 if (size_left <= SZ_2M)
6490                         break;
6491
6492                 pos += SZ_2M;
6493                 addr += SZ_2M;
6494                 size_left -= SZ_2M;
6495         }
6496
6497         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6498          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6499          * ASID
6500          */
6501         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6502                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6503
6504         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6505
6506 out:
6507         hdev->asic_funcs->hw_queues_unlock(hdev);
6508
6509         hdev->asic_funcs->set_clock_gating(hdev);
6510
6511         mutex_unlock(&gaudi->clk_gate_mutex);
6512
6513         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6514                                                 dma_addr);
6515
6516         return rc;
6517 }
6518
6519 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6520 {
6521         struct gaudi_device *gaudi = hdev->asic_specific;
6522
6523         if (hdev->hard_reset_pending)
6524                 return U64_MAX;
6525
6526         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6527                         (addr - gaudi->hbm_bar_cur_addr));
6528 }
6529
6530 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6531 {
6532         struct gaudi_device *gaudi = hdev->asic_specific;
6533
6534         if (hdev->hard_reset_pending)
6535                 return;
6536
6537         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6538                         (addr - gaudi->hbm_bar_cur_addr));
6539 }
6540
6541 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6542 {
6543         /* mask to zero the MMBP and ASID bits */
6544         WREG32_AND(reg, ~0x7FF);
6545         WREG32_OR(reg, asid);
6546 }
6547
6548 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6549 {
6550         struct gaudi_device *gaudi = hdev->asic_specific;
6551
6552         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6553                 return;
6554
6555         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6556                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6557                 return;
6558         }
6559
6560         mutex_lock(&gaudi->clk_gate_mutex);
6561
6562         hdev->asic_funcs->disable_clock_gating(hdev);
6563
6564         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569
6570         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6575
6576         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581
6582         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587
6588         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6593
6594         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6599
6600         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6605
6606         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6611
6612         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6620
6621         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6626         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6628
6629         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6636
6637         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6638         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6644
6645         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6649         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6650         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6652
6653         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6656         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6657         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6658         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6660
6661         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6665         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6668
6669         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6670         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6671         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6672         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6673         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6674         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6676
6677         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6678         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6679         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6680         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6681         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6682         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6683         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6684
6685         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6686         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6687         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6688         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6689         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6690         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6691         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6692         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6693         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6694         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6695
6696         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6697         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6698         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6699         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6700         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6701         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6702         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6703         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6704         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6705         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6706         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6707         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6708
6709         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6710                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6711                                 asid);
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6713                                 asid);
6714                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6715                                 asid);
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6719                                 asid);
6720         }
6721
6722         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6723                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6724                                 asid);
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6726                                 asid);
6727                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6728                                 asid);
6729                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6730                                 asid);
6731                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6732                                 asid);
6733         }
6734
6735         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6736                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6737                                 asid);
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6739                                 asid);
6740                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6741                                 asid);
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6745                                 asid);
6746         }
6747
6748         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6749                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6750                                 asid);
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6752                                 asid);
6753                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6754                                 asid);
6755                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6756                                 asid);
6757                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6758                                 asid);
6759         }
6760
6761         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6762                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6763                                 asid);
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6765                                 asid);
6766                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6767                                 asid);
6768                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6769                                 asid);
6770                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6771                                 asid);
6772         }
6773
6774         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6775                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6776                                 asid);
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6778                                 asid);
6779                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6780                                 asid);
6781                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6782                                 asid);
6783                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6784                                 asid);
6785         }
6786
6787         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6788                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6789                                 asid);
6790                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6791                                 asid);
6792                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6793                                 asid);
6794                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6795                                 asid);
6796                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6797                                 asid);
6798         }
6799
6800         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6801                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6802                                 asid);
6803                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6804                                 asid);
6805                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6806                                 asid);
6807                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6808                                 asid);
6809                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6810                                 asid);
6811         }
6812
6813         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6814                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6815                                 asid);
6816                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6817                                 asid);
6818                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6819                                 asid);
6820                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6821                                 asid);
6822                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6823                                 asid);
6824         }
6825
6826         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6827                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6828                                 asid);
6829                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6830                                 asid);
6831                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6832                                 asid);
6833                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6834                                 asid);
6835                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6836                                 asid);
6837         }
6838
6839         hdev->asic_funcs->set_clock_gating(hdev);
6840
6841         mutex_unlock(&gaudi->clk_gate_mutex);
6842 }
6843
6844 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6845                 struct hl_cs_job *job)
6846 {
6847         struct packet_msg_prot *fence_pkt;
6848         u32 *fence_ptr;
6849         dma_addr_t fence_dma_addr;
6850         struct hl_cb *cb;
6851         u32 tmp, timeout, dma_offset;
6852         int rc;
6853
6854         if (hdev->pldm)
6855                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6856         else
6857                 timeout = HL_DEVICE_TIMEOUT_USEC;
6858
6859         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6860                 dev_err_ratelimited(hdev->dev,
6861                         "Can't send driver job on QMAN0 because the device is not idle\n");
6862                 return -EBUSY;
6863         }
6864
6865         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6866                                                         &fence_dma_addr);
6867         if (!fence_ptr) {
6868                 dev_err(hdev->dev,
6869                         "Failed to allocate fence memory for QMAN0\n");
6870                 return -ENOMEM;
6871         }
6872
6873         cb = job->patched_cb;
6874
6875         fence_pkt = cb->kernel_address +
6876                         job->job_cb_size - sizeof(struct packet_msg_prot);
6877
6878         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6879         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6880         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6881
6882         fence_pkt->ctl = cpu_to_le32(tmp);
6883         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6884         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6885
6886         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6887
6888         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6889
6890         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6891                                         job->job_cb_size, cb->bus_address);
6892         if (rc) {
6893                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6894                 goto free_fence_ptr;
6895         }
6896
6897         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6898                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6899                                 timeout, true);
6900
6901         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6902
6903         if (rc == -ETIMEDOUT) {
6904                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6905                 goto free_fence_ptr;
6906         }
6907
6908 free_fence_ptr:
6909         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6910                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6911
6912         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6913                                         fence_dma_addr);
6914         return rc;
6915 }
6916
6917 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6918 {
6919         if (event_type >= GAUDI_EVENT_SIZE)
6920                 goto event_not_supported;
6921
6922         if (!gaudi_irq_map_table[event_type].valid)
6923                 goto event_not_supported;
6924
6925         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6926
6927         return;
6928
6929 event_not_supported:
6930         snprintf(desc, size, "N/A");
6931 }
6932
6933 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6934                                                         u32 x_y, bool is_write)
6935 {
6936         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6937
6938         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6939                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6940
6941         switch (x_y) {
6942         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6943         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6944                 dma_id[0] = 0;
6945                 dma_id[1] = 2;
6946                 break;
6947         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6948         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6949                 dma_id[0] = 1;
6950                 dma_id[1] = 3;
6951                 break;
6952         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6953         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6954                 dma_id[0] = 4;
6955                 dma_id[1] = 6;
6956                 break;
6957         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6958         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6959                 dma_id[0] = 5;
6960                 dma_id[1] = 7;
6961                 break;
6962         default:
6963                 goto unknown_initiator;
6964         }
6965
6966         for (i = 0 ; i < 2 ; i++) {
6967                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6968                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6969         }
6970
6971         switch (x_y) {
6972         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6973         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6974                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6975                         return "DMA0";
6976                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6977                         return "DMA2";
6978                 else
6979                         return "DMA0 or DMA2";
6980         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6981         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6982                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6983                         return "DMA1";
6984                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6985                         return "DMA3";
6986                 else
6987                         return "DMA1 or DMA3";
6988         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6989         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6990                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6991                         return "DMA4";
6992                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6993                         return "DMA6";
6994                 else
6995                         return "DMA4 or DMA6";
6996         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6997         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6998                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6999                         return "DMA5";
7000                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
7001                         return "DMA7";
7002                 else
7003                         return "DMA5 or DMA7";
7004         }
7005
7006 unknown_initiator:
7007         return "unknown initiator";
7008 }
7009
7010 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
7011                                                         bool is_write)
7012 {
7013         u32 val, x_y, axi_id;
7014
7015         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
7016                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
7017         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
7018                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7019         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7020                         RAZWI_INITIATOR_AXI_ID_SHIFT);
7021
7022         switch (x_y) {
7023         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7024                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7025                         return "TPC0";
7026                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7027                         return "NIC0";
7028                 break;
7029         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7030                 return "TPC1";
7031         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7032         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7033                 return "MME0";
7034         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7035         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7036                 return "MME1";
7037         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7038                 return "TPC2";
7039         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7040                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7041                         return "TPC3";
7042                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7043                         return "PCI";
7044                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7045                         return "CPU";
7046                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7047                         return "PSOC";
7048                 break;
7049         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7050         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7051         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7052         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7053         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7054         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7055         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7056         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7057                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7058         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7059                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7060                         return "TPC4";
7061                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7062                         return "NIC1";
7063                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7064                         return "NIC2";
7065                 break;
7066         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7067                 return "TPC5";
7068         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7069         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7070                 return "MME2";
7071         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7072         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7073                 return "MME3";
7074         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7075                 return "TPC6";
7076         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7077                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7078                         return "TPC7";
7079                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7080                         return "NIC4";
7081                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7082                         return "NIC5";
7083                 break;
7084         default:
7085                 break;
7086         }
7087
7088         dev_err(hdev->dev,
7089                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7090                 val,
7091                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7092                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7093                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7094                         RAZWI_INITIATOR_AXI_ID_MASK);
7095
7096         return "unknown initiator";
7097 }
7098
7099 static void gaudi_print_razwi_info(struct hl_device *hdev)
7100 {
7101         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7102                 dev_err_ratelimited(hdev->dev,
7103                         "RAZWI event caused by illegal write of %s\n",
7104                         gaudi_get_razwi_initiator_name(hdev, true));
7105                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7106         }
7107
7108         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7109                 dev_err_ratelimited(hdev->dev,
7110                         "RAZWI event caused by illegal read of %s\n",
7111                         gaudi_get_razwi_initiator_name(hdev, false));
7112                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7113         }
7114 }
7115
7116 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7117 {
7118         struct gaudi_device *gaudi = hdev->asic_specific;
7119         u64 addr;
7120         u32 val;
7121
7122         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7123                 return;
7124
7125         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7126         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7127                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7128                 addr <<= 32;
7129                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7130
7131                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7132                                         addr);
7133
7134                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7135         }
7136
7137         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7138         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7139                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7140                 addr <<= 32;
7141                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7142
7143                 dev_err_ratelimited(hdev->dev,
7144                                 "MMU access error on va 0x%llx\n", addr);
7145
7146                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7147         }
7148 }
7149
7150 /*
7151  *  +-------------------+------------------------------------------------------+
7152  *  | Configuration Reg |                     Description                      |
7153  *  |      Address      |                                                      |
7154  *  +-------------------+------------------------------------------------------+
7155  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7156  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7157  *  |                   |0xF34 memory wrappers 63:32                           |
7158  *  |                   |0xF38 memory wrappers 95:64                           |
7159  *  |                   |0xF3C memory wrappers 127:96                          |
7160  *  +-------------------+------------------------------------------------------+
7161  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7162  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7163  *  |                   |0xF44 memory wrappers 63:32                           |
7164  *  |                   |0xF48 memory wrappers 95:64                           |
7165  *  |                   |0xF4C memory wrappers 127:96                          |
7166  *  +-------------------+------------------------------------------------------+
7167  */
7168 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7169                 struct ecc_info_extract_params *params, u64 *ecc_address,
7170                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7171 {
7172         struct gaudi_device *gaudi = hdev->asic_specific;
7173         u32 i, num_mem_regs, reg, err_bit;
7174         u64 err_addr, err_word = 0;
7175         int rc = 0;
7176
7177         num_mem_regs = params->num_memories / 32 +
7178                         ((params->num_memories % 32) ? 1 : 0);
7179
7180         if (params->block_address >= CFG_BASE)
7181                 params->block_address -= CFG_BASE;
7182
7183         if (params->derr)
7184                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7185         else
7186                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7187
7188         if (params->disable_clock_gating) {
7189                 mutex_lock(&gaudi->clk_gate_mutex);
7190                 hdev->asic_funcs->disable_clock_gating(hdev);
7191         }
7192
7193         /* Set invalid wrapper index */
7194         *memory_wrapper_idx = 0xFF;
7195
7196         /* Iterate through memory wrappers, a single bit must be set */
7197         for (i = 0 ; i < num_mem_regs ; i++) {
7198                 err_addr += i * 4;
7199                 err_word = RREG32(err_addr);
7200                 if (err_word) {
7201                         err_bit = __ffs(err_word);
7202                         *memory_wrapper_idx = err_bit + (32 * i);
7203                         break;
7204                 }
7205         }
7206
7207         if (*memory_wrapper_idx == 0xFF) {
7208                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7209                 rc = -EINVAL;
7210                 goto enable_clk_gate;
7211         }
7212
7213         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7214                         *memory_wrapper_idx);
7215
7216         *ecc_address =
7217                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7218         *ecc_syndrom =
7219                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7220
7221         /* Clear error indication */
7222         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7223         if (params->derr)
7224                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7225         else
7226                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7227
7228         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7229
7230 enable_clk_gate:
7231         if (params->disable_clock_gating) {
7232                 hdev->asic_funcs->set_clock_gating(hdev);
7233
7234                 mutex_unlock(&gaudi->clk_gate_mutex);
7235         }
7236
7237         return rc;
7238 }
7239
7240 /*
7241  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7242  *
7243  * @idx: the current pi/ci value
7244  * @q_len: the queue length (power of 2)
7245  *
7246  * @return the cyclically decremented index
7247  */
7248 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7249 {
7250         u32 mask = q_len - 1;
7251
7252         /*
7253          * modular decrement is equivalent to adding (queue_size -1)
7254          * later we take LSBs to make sure the value is in the
7255          * range [0, queue_len - 1]
7256          */
7257         return (idx + q_len - 1) & mask;
7258 }
7259
7260 /**
7261  * gaudi_print_sw_config_stream_data - print SW config stream data
7262  *
7263  * @hdev: pointer to the habanalabs device structure
7264  * @stream: the QMAN's stream
7265  * @qman_base: base address of QMAN registers block
7266  */
7267 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7268                                                 u64 qman_base)
7269 {
7270         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7271         u32 cq_ptr_lo_off, size;
7272
7273         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7274
7275         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7276                                                 stream * cq_ptr_lo_off;
7277         cq_ptr_hi = cq_ptr_lo +
7278                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7279         cq_tsize = cq_ptr_lo +
7280                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7281
7282         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7283         size = RREG32(cq_tsize);
7284         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7285                                                         stream, cq_ptr, size);
7286 }
7287
7288 /**
7289  * gaudi_print_last_pqes_on_err - print last PQEs on error
7290  *
7291  * @hdev: pointer to the habanalabs device structure
7292  * @qid_base: first QID of the QMAN (out of 4 streams)
7293  * @stream: the QMAN's stream
7294  * @qman_base: base address of QMAN registers block
7295  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7296  */
7297 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7298                                                 u32 stream, u64 qman_base,
7299                                                 bool pr_sw_conf)
7300 {
7301         u32 ci, qm_ci_stream_off, queue_len;
7302         struct hl_hw_queue *q;
7303         u64 pq_ci;
7304         int i;
7305
7306         q = &hdev->kernel_queues[qid_base + stream];
7307
7308         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7309         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7310                                                 stream * qm_ci_stream_off;
7311
7312         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7313                                         q->int_queue_len : HL_QUEUE_LENGTH;
7314
7315         hdev->asic_funcs->hw_queues_lock(hdev);
7316
7317         if (pr_sw_conf)
7318                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7319
7320         ci = RREG32(pq_ci);
7321
7322         /* we should start printing form ci -1 */
7323         ci = gaudi_queue_idx_dec(ci, queue_len);
7324
7325         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7326                 struct hl_bd *bd;
7327                 u64 addr;
7328                 u32 len;
7329
7330                 bd = q->kernel_address;
7331                 bd += ci;
7332
7333                 len = le32_to_cpu(bd->len);
7334                 /* len 0 means uninitialized entry- break */
7335                 if (!len)
7336                         break;
7337
7338                 addr = le64_to_cpu(bd->ptr);
7339
7340                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7341                                                         stream, ci, addr, len);
7342
7343                 /* get previous ci, wrap if needed */
7344                 ci = gaudi_queue_idx_dec(ci, queue_len);
7345         }
7346
7347         hdev->asic_funcs->hw_queues_unlock(hdev);
7348 }
7349
7350 /**
7351  * print_qman_data_on_err - extract QMAN data on error
7352  *
7353  * @hdev: pointer to the habanalabs device structure
7354  * @qid_base: first QID of the QMAN (out of 4 streams)
7355  * @stream: the QMAN's stream
7356  * @qman_base: base address of QMAN registers block
7357  *
7358  * This function attempt to exatract as much data as possible on QMAN error.
7359  * On upper CP print the SW config stream data and last 8 PQEs.
7360  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7361  */
7362 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7363                                                 u32 stream, u64 qman_base)
7364 {
7365         u32 i;
7366
7367         if (stream != QMAN_STREAMS) {
7368                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7369                                                                         true);
7370                 return;
7371         }
7372
7373         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7374
7375         for (i = 0; i < QMAN_STREAMS; i++)
7376                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7377                                                                         false);
7378 }
7379
7380 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7381                                           const char *qm_name,
7382                                           u64 qman_base,
7383                                           u32 qid_base)
7384 {
7385         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7386         u64 glbl_sts_addr, arb_err_addr;
7387         char reg_desc[32];
7388
7389         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7390         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7391
7392         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7393         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7394                 glbl_sts_clr_val = 0;
7395                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7396
7397                 if (!glbl_sts_val)
7398                         continue;
7399
7400                 if (i == QMAN_STREAMS)
7401                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7402                 else
7403                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7404
7405                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7406                         if (glbl_sts_val & BIT(j)) {
7407                                 dev_err_ratelimited(hdev->dev,
7408                                                 "%s %s. err cause: %s\n",
7409                                                 qm_name, reg_desc,
7410                                                 gaudi_qman_error_cause[j]);
7411                                 glbl_sts_clr_val |= BIT(j);
7412                         }
7413                 }
7414
7415                 /* Write 1 clear errors */
7416                 if (!hdev->stop_on_err)
7417                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7418                 else
7419                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7420         }
7421
7422         arb_err_val = RREG32(arb_err_addr);
7423
7424         if (!arb_err_val)
7425                 return;
7426
7427         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7428                 if (arb_err_val & BIT(j)) {
7429                         dev_err_ratelimited(hdev->dev,
7430                                         "%s ARB_ERR. err cause: %s\n",
7431                                         qm_name,
7432                                         gaudi_qman_arb_error_cause[j]);
7433                 }
7434         }
7435 }
7436
7437 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7438                 struct hl_eq_sm_sei_data *sei_data)
7439 {
7440         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7441
7442         /* Flip the bits as the enum is ordered in the opposite way */
7443         index = (index ^ 0x3) & 0x3;
7444
7445         switch (sei_data->sei_cause) {
7446         case SM_SEI_SO_OVERFLOW:
7447                 dev_err_ratelimited(hdev->dev,
7448                         "%s SEI Error: SOB Group %u overflow/underflow",
7449                         gaudi_sync_manager_names[index],
7450                         le32_to_cpu(sei_data->sei_log));
7451                 break;
7452         case SM_SEI_LBW_4B_UNALIGNED:
7453                 dev_err_ratelimited(hdev->dev,
7454                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7455                         gaudi_sync_manager_names[index],
7456                         le32_to_cpu(sei_data->sei_log));
7457                 break;
7458         case SM_SEI_AXI_RESPONSE_ERR:
7459                 dev_err_ratelimited(hdev->dev,
7460                         "%s SEI Error: AXI ID %u response error",
7461                         gaudi_sync_manager_names[index],
7462                         le32_to_cpu(sei_data->sei_log));
7463                 break;
7464         default:
7465                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7466                                 le32_to_cpu(sei_data->sei_log));
7467                 break;
7468         }
7469 }
7470
7471 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7472                 struct hl_eq_ecc_data *ecc_data)
7473 {
7474         struct ecc_info_extract_params params;
7475         u64 ecc_address = 0, ecc_syndrom = 0;
7476         u8 index, memory_wrapper_idx = 0;
7477         bool extract_info_from_fw;
7478         int rc;
7479
7480         switch (event_type) {
7481         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7482         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7483                 extract_info_from_fw = true;
7484                 break;
7485         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7486                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7487                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7488                 params.num_memories = 90;
7489                 params.derr = false;
7490                 params.disable_clock_gating = true;
7491                 extract_info_from_fw = false;
7492                 break;
7493         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7494                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7495                 params.block_address =
7496                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7497                 params.num_memories = 90;
7498                 params.derr = true;
7499                 params.disable_clock_gating = true;
7500                 extract_info_from_fw = false;
7501                 break;
7502         case GAUDI_EVENT_MME0_ACC_SERR:
7503         case GAUDI_EVENT_MME1_ACC_SERR:
7504         case GAUDI_EVENT_MME2_ACC_SERR:
7505         case GAUDI_EVENT_MME3_ACC_SERR:
7506                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7507                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7508                 params.num_memories = 128;
7509                 params.derr = false;
7510                 params.disable_clock_gating = true;
7511                 extract_info_from_fw = false;
7512                 break;
7513         case GAUDI_EVENT_MME0_ACC_DERR:
7514         case GAUDI_EVENT_MME1_ACC_DERR:
7515         case GAUDI_EVENT_MME2_ACC_DERR:
7516         case GAUDI_EVENT_MME3_ACC_DERR:
7517                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7518                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7519                 params.num_memories = 128;
7520                 params.derr = true;
7521                 params.disable_clock_gating = true;
7522                 extract_info_from_fw = false;
7523                 break;
7524         case GAUDI_EVENT_MME0_SBAB_SERR:
7525         case GAUDI_EVENT_MME1_SBAB_SERR:
7526         case GAUDI_EVENT_MME2_SBAB_SERR:
7527         case GAUDI_EVENT_MME3_SBAB_SERR:
7528                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7529                 params.block_address =
7530                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7531                 params.num_memories = 33;
7532                 params.derr = false;
7533                 params.disable_clock_gating = true;
7534                 extract_info_from_fw = false;
7535                 break;
7536         case GAUDI_EVENT_MME0_SBAB_DERR:
7537         case GAUDI_EVENT_MME1_SBAB_DERR:
7538         case GAUDI_EVENT_MME2_SBAB_DERR:
7539         case GAUDI_EVENT_MME3_SBAB_DERR:
7540                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7541                 params.block_address =
7542                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7543                 params.num_memories = 33;
7544                 params.derr = true;
7545                 params.disable_clock_gating = true;
7546                 extract_info_from_fw = false;
7547                 break;
7548         default:
7549                 return;
7550         }
7551
7552         if (extract_info_from_fw) {
7553                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7554                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7555                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7556         } else {
7557                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7558                                 &ecc_syndrom, &memory_wrapper_idx);
7559                 if (rc)
7560                         return;
7561         }
7562
7563         dev_err(hdev->dev,
7564                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7565                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7566 }
7567
7568 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7569 {
7570         u64 qman_base;
7571         char desc[32];
7572         u32 qid_base;
7573         u8 index;
7574
7575         switch (event_type) {
7576         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7577                 index = event_type - GAUDI_EVENT_TPC0_QM;
7578                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7579                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7580                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7581                 break;
7582         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7583                 index = event_type - GAUDI_EVENT_MME0_QM;
7584                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7585                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7586                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7587                 break;
7588         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7589                 index = event_type - GAUDI_EVENT_DMA0_QM;
7590                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7591                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7592                 if (index > 1)
7593                         qid_base++;
7594                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7595                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7596                 break;
7597         case GAUDI_EVENT_NIC0_QM0:
7598                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7599                 qman_base = mmNIC0_QM0_BASE;
7600                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7601                 break;
7602         case GAUDI_EVENT_NIC0_QM1:
7603                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7604                 qman_base = mmNIC0_QM1_BASE;
7605                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7606                 break;
7607         case GAUDI_EVENT_NIC1_QM0:
7608                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7609                 qman_base = mmNIC1_QM0_BASE;
7610                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7611                 break;
7612         case GAUDI_EVENT_NIC1_QM1:
7613                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7614                 qman_base = mmNIC1_QM1_BASE;
7615                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7616                 break;
7617         case GAUDI_EVENT_NIC2_QM0:
7618                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7619                 qman_base = mmNIC2_QM0_BASE;
7620                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7621                 break;
7622         case GAUDI_EVENT_NIC2_QM1:
7623                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7624                 qman_base = mmNIC2_QM1_BASE;
7625                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7626                 break;
7627         case GAUDI_EVENT_NIC3_QM0:
7628                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7629                 qman_base = mmNIC3_QM0_BASE;
7630                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7631                 break;
7632         case GAUDI_EVENT_NIC3_QM1:
7633                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7634                 qman_base = mmNIC3_QM1_BASE;
7635                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7636                 break;
7637         case GAUDI_EVENT_NIC4_QM0:
7638                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7639                 qman_base = mmNIC4_QM0_BASE;
7640                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7641                 break;
7642         case GAUDI_EVENT_NIC4_QM1:
7643                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7644                 qman_base = mmNIC4_QM1_BASE;
7645                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7646                 break;
7647         default:
7648                 return;
7649         }
7650
7651         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7652 }
7653
7654 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7655                                         bool razwi)
7656 {
7657         char desc[64] = "";
7658
7659         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7660         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7661                 event_type, desc);
7662
7663         if (razwi) {
7664                 gaudi_print_razwi_info(hdev);
7665                 gaudi_print_mmu_error_info(hdev);
7666         }
7667 }
7668
7669 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7670                                         struct cpucp_pkt_sync_err *sync_err)
7671 {
7672         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7673
7674         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7675                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7676 }
7677
7678 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7679                                         struct hl_eq_fw_alive *fw_alive)
7680 {
7681         dev_err(hdev->dev,
7682                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7683                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7684                 "Minor" : "Critical", fw_alive->process_id,
7685                 fw_alive->thread_id, fw_alive->uptime_seconds);
7686 }
7687
7688 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7689 {
7690         struct gaudi_device *gaudi = hdev->asic_specific;
7691
7692         /* Unmask all IRQs since some could have been received
7693          * during the soft reset
7694          */
7695         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7696 }
7697
7698 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7699                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7700 {
7701         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7702         int rc = 0;
7703
7704         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7705                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7706                 if (!hbm_ecc_data) {
7707                         dev_err(hdev->dev, "No FW ECC data");
7708                         return 0;
7709                 }
7710
7711                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7712                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7713                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7714                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7715                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7716                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7717                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7718                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7719                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7720                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7721                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7722                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7723                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7724                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7725
7726                 dev_err(hdev->dev,
7727                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7728                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7729                 dev_err(hdev->dev,
7730                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7731                         device, ch, hbm_ecc_data->first_addr, type,
7732                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7733                         hbm_ecc_data->dec_cnt);
7734                 return 0;
7735         }
7736
7737         if (hdev->asic_prop.fw_security_enabled) {
7738                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7739                 return 0;
7740         }
7741
7742         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7743         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7744                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7745                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7746                 if (val) {
7747                         rc = -EIO;
7748                         dev_err(hdev->dev,
7749                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7750                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7751                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7752                                 (val >> 4) & 0x1);
7753
7754                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7755                         dev_err(hdev->dev,
7756                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7757                                 device, ch * 2,
7758                                 RREG32(base + ch * 0x1000 + 0x064),
7759                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7760                                 (val2 & 0xFF0000) >> 16,
7761                                 (val2 & 0xFF000000) >> 24);
7762                 }
7763
7764                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7765                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7766                 if (val) {
7767                         rc = -EIO;
7768                         dev_err(hdev->dev,
7769                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7770                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7771                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7772                                 (val >> 4) & 0x1);
7773
7774                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7775                         dev_err(hdev->dev,
7776                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7777                                 device, ch * 2 + 1,
7778                                 RREG32(base + ch * 0x1000 + 0x074),
7779                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7780                                 (val2 & 0xFF0000) >> 16,
7781                                 (val2 & 0xFF000000) >> 24);
7782                 }
7783
7784                 /* Clear interrupts */
7785                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7786                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7787                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7788                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7789                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7790                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7791         }
7792
7793         val  = RREG32(base + 0x8F30);
7794         val2 = RREG32(base + 0x8F34);
7795         if (val | val2) {
7796                 rc = -EIO;
7797                 dev_err(hdev->dev,
7798                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7799                         device, val, val2);
7800         }
7801         val  = RREG32(base + 0x8F40);
7802         val2 = RREG32(base + 0x8F44);
7803         if (val | val2) {
7804                 rc = -EIO;
7805                 dev_err(hdev->dev,
7806                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7807                         device, val, val2);
7808         }
7809
7810         return rc;
7811 }
7812
7813 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7814 {
7815         switch (hbm_event_type) {
7816         case GAUDI_EVENT_HBM0_SPI_0:
7817         case GAUDI_EVENT_HBM0_SPI_1:
7818                 return 0;
7819         case GAUDI_EVENT_HBM1_SPI_0:
7820         case GAUDI_EVENT_HBM1_SPI_1:
7821                 return 1;
7822         case GAUDI_EVENT_HBM2_SPI_0:
7823         case GAUDI_EVENT_HBM2_SPI_1:
7824                 return 2;
7825         case GAUDI_EVENT_HBM3_SPI_0:
7826         case GAUDI_EVENT_HBM3_SPI_1:
7827                 return 3;
7828         default:
7829                 break;
7830         }
7831
7832         /* Should never happen */
7833         return 0;
7834 }
7835
7836 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7837                                         char *interrupt_name)
7838 {
7839         struct gaudi_device *gaudi = hdev->asic_specific;
7840         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7841         bool soft_reset_required = false;
7842
7843         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7844          * gating, and thus cannot be done in CPU-CP and should be done instead
7845          * by the driver.
7846          */
7847
7848         mutex_lock(&gaudi->clk_gate_mutex);
7849
7850         hdev->asic_funcs->disable_clock_gating(hdev);
7851
7852         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7853                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7854
7855         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7856                 if (tpc_interrupts_cause & BIT(i)) {
7857                         dev_err_ratelimited(hdev->dev,
7858                                         "TPC%d_%s interrupt cause: %s\n",
7859                                         tpc_id, interrupt_name,
7860                                         gaudi_tpc_interrupts_cause[i]);
7861                         /* If this is QM error, we need to soft-reset */
7862                         if (i == 15)
7863                                 soft_reset_required = true;
7864                 }
7865
7866         /* Clear interrupts */
7867         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7868
7869         hdev->asic_funcs->set_clock_gating(hdev);
7870
7871         mutex_unlock(&gaudi->clk_gate_mutex);
7872
7873         return soft_reset_required;
7874 }
7875
7876 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7877 {
7878         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7879 }
7880
7881 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7882 {
7883         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7884 }
7885
7886 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7887                                         u16 event_type)
7888 {
7889         switch (event_type) {
7890         case GAUDI_EVENT_FIX_POWER_ENV_S:
7891                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7892                 dev_info_ratelimited(hdev->dev,
7893                         "Clock throttling due to power consumption\n");
7894                 break;
7895
7896         case GAUDI_EVENT_FIX_POWER_ENV_E:
7897                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7898                 dev_info_ratelimited(hdev->dev,
7899                         "Power envelop is safe, back to optimal clock\n");
7900                 break;
7901
7902         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7903                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7904                 dev_info_ratelimited(hdev->dev,
7905                         "Clock throttling due to overheating\n");
7906                 break;
7907
7908         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7909                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7910                 dev_info_ratelimited(hdev->dev,
7911                         "Thermal envelop is safe, back to optimal clock\n");
7912                 break;
7913
7914         default:
7915                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7916                         event_type);
7917                 break;
7918         }
7919 }
7920
7921 static void gaudi_handle_eqe(struct hl_device *hdev,
7922                                 struct hl_eq_entry *eq_entry)
7923 {
7924         struct gaudi_device *gaudi = hdev->asic_specific;
7925         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7926         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7927                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7928         bool reset_required;
7929         u8 cause;
7930         int rc;
7931
7932         gaudi->events_stat[event_type]++;
7933         gaudi->events_stat_aggregate[event_type]++;
7934
7935         switch (event_type) {
7936         case GAUDI_EVENT_PCIE_CORE_DERR:
7937         case GAUDI_EVENT_PCIE_IF_DERR:
7938         case GAUDI_EVENT_PCIE_PHY_DERR:
7939         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7940         case GAUDI_EVENT_MME0_ACC_DERR:
7941         case GAUDI_EVENT_MME0_SBAB_DERR:
7942         case GAUDI_EVENT_MME1_ACC_DERR:
7943         case GAUDI_EVENT_MME1_SBAB_DERR:
7944         case GAUDI_EVENT_MME2_ACC_DERR:
7945         case GAUDI_EVENT_MME2_SBAB_DERR:
7946         case GAUDI_EVENT_MME3_ACC_DERR:
7947         case GAUDI_EVENT_MME3_SBAB_DERR:
7948         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7949                 fallthrough;
7950         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7951         case GAUDI_EVENT_PSOC_MEM_DERR:
7952         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7953         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7954         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7955         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7956         case GAUDI_EVENT_MMU_DERR:
7957         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7958                 gaudi_print_irq_info(hdev, event_type, true);
7959                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7960                 goto reset_device;
7961
7962         case GAUDI_EVENT_GIC500:
7963         case GAUDI_EVENT_AXI_ECC:
7964         case GAUDI_EVENT_L2_RAM_ECC:
7965         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7966                 gaudi_print_irq_info(hdev, event_type, false);
7967                 goto reset_device;
7968
7969         case GAUDI_EVENT_HBM0_SPI_0:
7970         case GAUDI_EVENT_HBM1_SPI_0:
7971         case GAUDI_EVENT_HBM2_SPI_0:
7972         case GAUDI_EVENT_HBM3_SPI_0:
7973                 gaudi_print_irq_info(hdev, event_type, false);
7974                 gaudi_hbm_read_interrupts(hdev,
7975                                 gaudi_hbm_event_to_dev(event_type),
7976                                 &eq_entry->hbm_ecc_data);
7977                 goto reset_device;
7978
7979         case GAUDI_EVENT_HBM0_SPI_1:
7980         case GAUDI_EVENT_HBM1_SPI_1:
7981         case GAUDI_EVENT_HBM2_SPI_1:
7982         case GAUDI_EVENT_HBM3_SPI_1:
7983                 gaudi_print_irq_info(hdev, event_type, false);
7984                 gaudi_hbm_read_interrupts(hdev,
7985                                 gaudi_hbm_event_to_dev(event_type),
7986                                 &eq_entry->hbm_ecc_data);
7987                 hl_fw_unmask_irq(hdev, event_type);
7988                 break;
7989
7990         case GAUDI_EVENT_TPC0_DEC:
7991         case GAUDI_EVENT_TPC1_DEC:
7992         case GAUDI_EVENT_TPC2_DEC:
7993         case GAUDI_EVENT_TPC3_DEC:
7994         case GAUDI_EVENT_TPC4_DEC:
7995         case GAUDI_EVENT_TPC5_DEC:
7996         case GAUDI_EVENT_TPC6_DEC:
7997         case GAUDI_EVENT_TPC7_DEC:
7998                 gaudi_print_irq_info(hdev, event_type, true);
7999                 reset_required = gaudi_tpc_read_interrupts(hdev,
8000                                         tpc_dec_event_to_tpc_id(event_type),
8001                                         "AXI_SLV_DEC_Error");
8002                 if (reset_required) {
8003                         dev_err(hdev->dev, "hard reset required due to %s\n",
8004                                 gaudi_irq_map_table[event_type].name);
8005
8006                         goto reset_device;
8007                 } else {
8008                         hl_fw_unmask_irq(hdev, event_type);
8009                 }
8010                 break;
8011
8012         case GAUDI_EVENT_TPC0_KRN_ERR:
8013         case GAUDI_EVENT_TPC1_KRN_ERR:
8014         case GAUDI_EVENT_TPC2_KRN_ERR:
8015         case GAUDI_EVENT_TPC3_KRN_ERR:
8016         case GAUDI_EVENT_TPC4_KRN_ERR:
8017         case GAUDI_EVENT_TPC5_KRN_ERR:
8018         case GAUDI_EVENT_TPC6_KRN_ERR:
8019         case GAUDI_EVENT_TPC7_KRN_ERR:
8020                 gaudi_print_irq_info(hdev, event_type, true);
8021                 reset_required = gaudi_tpc_read_interrupts(hdev,
8022                                         tpc_krn_event_to_tpc_id(event_type),
8023                                         "KRN_ERR");
8024                 if (reset_required) {
8025                         dev_err(hdev->dev, "hard reset required due to %s\n",
8026                                 gaudi_irq_map_table[event_type].name);
8027
8028                         goto reset_device;
8029                 } else {
8030                         hl_fw_unmask_irq(hdev, event_type);
8031                 }
8032                 break;
8033
8034         case GAUDI_EVENT_PCIE_CORE_SERR:
8035         case GAUDI_EVENT_PCIE_IF_SERR:
8036         case GAUDI_EVENT_PCIE_PHY_SERR:
8037         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8038         case GAUDI_EVENT_MME0_ACC_SERR:
8039         case GAUDI_EVENT_MME0_SBAB_SERR:
8040         case GAUDI_EVENT_MME1_ACC_SERR:
8041         case GAUDI_EVENT_MME1_SBAB_SERR:
8042         case GAUDI_EVENT_MME2_ACC_SERR:
8043         case GAUDI_EVENT_MME2_SBAB_SERR:
8044         case GAUDI_EVENT_MME3_ACC_SERR:
8045         case GAUDI_EVENT_MME3_SBAB_SERR:
8046         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8047         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8048         case GAUDI_EVENT_PSOC_MEM_SERR:
8049         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8050         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8051         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8052         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8053                 fallthrough;
8054         case GAUDI_EVENT_MMU_SERR:
8055                 gaudi_print_irq_info(hdev, event_type, true);
8056                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8057                 hl_fw_unmask_irq(hdev, event_type);
8058                 break;
8059
8060         case GAUDI_EVENT_PCIE_DEC:
8061         case GAUDI_EVENT_MME0_WBC_RSP:
8062         case GAUDI_EVENT_MME0_SBAB0_RSP:
8063         case GAUDI_EVENT_MME1_WBC_RSP:
8064         case GAUDI_EVENT_MME1_SBAB0_RSP:
8065         case GAUDI_EVENT_MME2_WBC_RSP:
8066         case GAUDI_EVENT_MME2_SBAB0_RSP:
8067         case GAUDI_EVENT_MME3_WBC_RSP:
8068         case GAUDI_EVENT_MME3_SBAB0_RSP:
8069         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8070         case GAUDI_EVENT_PSOC_AXI_DEC:
8071         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8072         case GAUDI_EVENT_MMU_PAGE_FAULT:
8073         case GAUDI_EVENT_MMU_WR_PERM:
8074         case GAUDI_EVENT_RAZWI_OR_ADC:
8075         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8076         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8077         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8078                 fallthrough;
8079         case GAUDI_EVENT_NIC0_QM0:
8080         case GAUDI_EVENT_NIC0_QM1:
8081         case GAUDI_EVENT_NIC1_QM0:
8082         case GAUDI_EVENT_NIC1_QM1:
8083         case GAUDI_EVENT_NIC2_QM0:
8084         case GAUDI_EVENT_NIC2_QM1:
8085         case GAUDI_EVENT_NIC3_QM0:
8086         case GAUDI_EVENT_NIC3_QM1:
8087         case GAUDI_EVENT_NIC4_QM0:
8088         case GAUDI_EVENT_NIC4_QM1:
8089         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8090                 gaudi_print_irq_info(hdev, event_type, true);
8091                 gaudi_handle_qman_err(hdev, event_type);
8092                 hl_fw_unmask_irq(hdev, event_type);
8093                 break;
8094
8095         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8096                 gaudi_print_irq_info(hdev, event_type, true);
8097                 goto reset_device;
8098
8099         case GAUDI_EVENT_TPC0_BMON_SPMU:
8100         case GAUDI_EVENT_TPC1_BMON_SPMU:
8101         case GAUDI_EVENT_TPC2_BMON_SPMU:
8102         case GAUDI_EVENT_TPC3_BMON_SPMU:
8103         case GAUDI_EVENT_TPC4_BMON_SPMU:
8104         case GAUDI_EVENT_TPC5_BMON_SPMU:
8105         case GAUDI_EVENT_TPC6_BMON_SPMU:
8106         case GAUDI_EVENT_TPC7_BMON_SPMU:
8107         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8108                 gaudi_print_irq_info(hdev, event_type, false);
8109                 hl_fw_unmask_irq(hdev, event_type);
8110                 break;
8111
8112         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8113                 gaudi_print_irq_info(hdev, event_type, false);
8114                 gaudi_print_sm_sei_info(hdev, event_type,
8115                                         &eq_entry->sm_sei_data);
8116                 rc = hl_state_dump(hdev);
8117                 if (rc)
8118                         dev_err(hdev->dev,
8119                                 "Error during system state dump %d\n", rc);
8120                 hl_fw_unmask_irq(hdev, event_type);
8121                 break;
8122
8123         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8124                 gaudi_print_clk_change_info(hdev, event_type);
8125                 hl_fw_unmask_irq(hdev, event_type);
8126                 break;
8127
8128         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8129                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8130                 dev_err(hdev->dev,
8131                         "Received high temp H/W interrupt %d (cause %d)\n",
8132                         event_type, cause);
8133                 break;
8134
8135         case GAUDI_EVENT_DEV_RESET_REQ:
8136                 gaudi_print_irq_info(hdev, event_type, false);
8137                 goto reset_device;
8138
8139         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8140                 gaudi_print_irq_info(hdev, event_type, false);
8141                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8142                 goto reset_device;
8143
8144         case GAUDI_EVENT_FW_ALIVE_S:
8145                 gaudi_print_irq_info(hdev, event_type, false);
8146                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8147                 goto reset_device;
8148
8149         default:
8150                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8151                                 event_type);
8152                 break;
8153         }
8154
8155         return;
8156
8157 reset_device:
8158         if (hdev->hard_reset_on_fw_events)
8159                 hl_device_reset(hdev, HL_RESET_HARD);
8160         else
8161                 hl_fw_unmask_irq(hdev, event_type);
8162 }
8163
8164 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8165                                         u32 *size)
8166 {
8167         struct gaudi_device *gaudi = hdev->asic_specific;
8168
8169         if (aggregate) {
8170                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8171                 return gaudi->events_stat_aggregate;
8172         }
8173
8174         *size = (u32) sizeof(gaudi->events_stat);
8175         return gaudi->events_stat;
8176 }
8177
8178 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8179                                         u32 flags)
8180 {
8181         struct gaudi_device *gaudi = hdev->asic_specific;
8182         u32 status, timeout_usec;
8183         int rc;
8184
8185         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8186                 hdev->hard_reset_pending)
8187                 return 0;
8188
8189         if (hdev->pldm)
8190                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8191         else
8192                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8193
8194         /* L0 & L1 invalidation */
8195         WREG32(mmSTLB_INV_PS, 3);
8196         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8197         WREG32(mmSTLB_INV_PS, 2);
8198
8199         rc = hl_poll_timeout(
8200                 hdev,
8201                 mmSTLB_INV_PS,
8202                 status,
8203                 !status,
8204                 1000,
8205                 timeout_usec);
8206
8207         WREG32(mmSTLB_INV_SET, 0);
8208
8209         if (rc) {
8210                 dev_err_ratelimited(hdev->dev,
8211                                         "MMU cache invalidation timeout\n");
8212                 hl_device_reset(hdev, HL_RESET_HARD);
8213         }
8214
8215         return rc;
8216 }
8217
8218 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8219                                                 bool is_hard, u32 flags,
8220                                                 u32 asid, u64 va, u64 size)
8221 {
8222         /* Treat as invalidate all because there is no range invalidation
8223          * in Gaudi
8224          */
8225         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8226 }
8227
8228 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8229                                         u32 asid, u64 phys_addr)
8230 {
8231         u32 status, timeout_usec;
8232         int rc;
8233
8234         if (hdev->pldm)
8235                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8236         else
8237                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8238
8239         WREG32(MMU_ASID, asid);
8240         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8241         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8242         WREG32(MMU_BUSY, 0x80000000);
8243
8244         rc = hl_poll_timeout(
8245                 hdev,
8246                 MMU_BUSY,
8247                 status,
8248                 !(status & 0x80000000),
8249                 1000,
8250                 timeout_usec);
8251
8252         if (rc) {
8253                 dev_err(hdev->dev,
8254                         "Timeout during MMU hop0 config of asid %d\n", asid);
8255                 return rc;
8256         }
8257
8258         return 0;
8259 }
8260
8261 static int gaudi_send_heartbeat(struct hl_device *hdev)
8262 {
8263         struct gaudi_device *gaudi = hdev->asic_specific;
8264
8265         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8266                 return 0;
8267
8268         return hl_fw_send_heartbeat(hdev);
8269 }
8270
8271 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8272 {
8273         struct gaudi_device *gaudi = hdev->asic_specific;
8274         struct asic_fixed_properties *prop = &hdev->asic_prop;
8275         int rc;
8276
8277         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8278                 return 0;
8279
8280         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8281                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8282                                         mmCPU_BOOT_ERR1);
8283         if (rc)
8284                 return rc;
8285
8286         if (!strlen(prop->cpucp_info.card_name))
8287                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8288                                 CARD_NAME_MAX_LEN);
8289
8290         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8291
8292         set_default_power_values(hdev);
8293
8294         hdev->max_power = prop->max_power_default;
8295
8296         return 0;
8297 }
8298
8299 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8300                                         u8 mask_len, struct seq_file *s)
8301 {
8302         struct gaudi_device *gaudi = hdev->asic_specific;
8303         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8304         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8305         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8306         unsigned long *mask = (unsigned long *)mask_arr;
8307         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8308         bool is_idle = true, is_eng_idle, is_slave;
8309         u64 offset;
8310         int i, dma_id, port;
8311
8312         mutex_lock(&gaudi->clk_gate_mutex);
8313
8314         hdev->asic_funcs->disable_clock_gating(hdev);
8315
8316         if (s)
8317                 seq_puts(s,
8318                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8319                         "---  -------  ------------  ----------  -------------\n");
8320
8321         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8322                 dma_id = gaudi_dma_assignment[i];
8323                 offset = dma_id * DMA_QMAN_OFFSET;
8324
8325                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8326                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8327                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8328                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8329                                 IS_DMA_IDLE(dma_core_sts0);
8330                 is_idle &= is_eng_idle;
8331
8332                 if (mask && !is_eng_idle)
8333                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8334                 if (s)
8335                         seq_printf(s, fmt, dma_id,
8336                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8337                                 qm_cgm_sts, dma_core_sts0);
8338         }
8339
8340         if (s)
8341                 seq_puts(s,
8342                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8343                         "---  -------  ------------  ----------  ----------\n");
8344
8345         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8346                 offset = i * TPC_QMAN_OFFSET;
8347                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8348                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8349                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8350                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8351                                 IS_TPC_IDLE(tpc_cfg_sts);
8352                 is_idle &= is_eng_idle;
8353
8354                 if (mask && !is_eng_idle)
8355                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8356                 if (s)
8357                         seq_printf(s, fmt, i,
8358                                 is_eng_idle ? "Y" : "N",
8359                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8360         }
8361
8362         if (s)
8363                 seq_puts(s,
8364                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8365                         "---  -------  ------------  ----------  -----------\n");
8366
8367         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8368                 offset = i * MME_QMAN_OFFSET;
8369                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8370                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8371
8372                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8373                 is_slave = i % 2;
8374                 if (!is_slave) {
8375                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8376                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8377                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8378                 }
8379
8380                 is_idle &= is_eng_idle;
8381
8382                 if (mask && !is_eng_idle)
8383                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8384                 if (s) {
8385                         if (!is_slave)
8386                                 seq_printf(s, fmt, i,
8387                                         is_eng_idle ? "Y" : "N",
8388                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8389                         else
8390                                 seq_printf(s, mme_slave_fmt, i,
8391                                         is_eng_idle ? "Y" : "N", "-",
8392                                         "-", mme_arch_sts);
8393                 }
8394         }
8395
8396         if (s)
8397                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8398                                 "---  -------  ------------  ----------\n");
8399
8400         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8401                 offset = i * NIC_MACRO_QMAN_OFFSET;
8402                 port = 2 * i;
8403                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8404                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8405                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8406                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8407                         is_idle &= is_eng_idle;
8408
8409                         if (mask && !is_eng_idle)
8410                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8411                         if (s)
8412                                 seq_printf(s, nic_fmt, port,
8413                                                 is_eng_idle ? "Y" : "N",
8414                                                 qm_glbl_sts0, qm_cgm_sts);
8415                 }
8416
8417                 port = 2 * i + 1;
8418                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8419                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8420                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8421                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8422                         is_idle &= is_eng_idle;
8423
8424                         if (mask && !is_eng_idle)
8425                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8426                         if (s)
8427                                 seq_printf(s, nic_fmt, port,
8428                                                 is_eng_idle ? "Y" : "N",
8429                                                 qm_glbl_sts0, qm_cgm_sts);
8430                 }
8431         }
8432
8433         if (s)
8434                 seq_puts(s, "\n");
8435
8436         hdev->asic_funcs->set_clock_gating(hdev);
8437
8438         mutex_unlock(&gaudi->clk_gate_mutex);
8439
8440         return is_idle;
8441 }
8442
8443 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8444         __acquires(&gaudi->hw_queues_lock)
8445 {
8446         struct gaudi_device *gaudi = hdev->asic_specific;
8447
8448         spin_lock(&gaudi->hw_queues_lock);
8449 }
8450
8451 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8452         __releases(&gaudi->hw_queues_lock)
8453 {
8454         struct gaudi_device *gaudi = hdev->asic_specific;
8455
8456         spin_unlock(&gaudi->hw_queues_lock);
8457 }
8458
8459 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8460 {
8461         return hdev->pdev->device;
8462 }
8463
8464 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8465                                 size_t max_size)
8466 {
8467         struct gaudi_device *gaudi = hdev->asic_specific;
8468
8469         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8470                 return 0;
8471
8472         return hl_fw_get_eeprom_data(hdev, data, max_size);
8473 }
8474
8475 /*
8476  * this function should be used only during initialization and/or after reset,
8477  * when there are no active users.
8478  */
8479 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8480                                 u32 tpc_id)
8481 {
8482         struct gaudi_device *gaudi = hdev->asic_specific;
8483         u64 kernel_timeout;
8484         u32 status, offset;
8485         int rc;
8486
8487         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8488
8489         if (hdev->pldm)
8490                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8491         else
8492                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8493
8494         mutex_lock(&gaudi->clk_gate_mutex);
8495
8496         hdev->asic_funcs->disable_clock_gating(hdev);
8497
8498         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8499                         lower_32_bits(tpc_kernel));
8500         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8501                         upper_32_bits(tpc_kernel));
8502
8503         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8504                         lower_32_bits(tpc_kernel));
8505         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8506                         upper_32_bits(tpc_kernel));
8507         /* set a valid LUT pointer, content is of no significance */
8508         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8509                         lower_32_bits(tpc_kernel));
8510         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8511                         upper_32_bits(tpc_kernel));
8512
8513         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8514                         lower_32_bits(CFG_BASE +
8515                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8516
8517         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8518                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8519                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8520         /* wait a bit for the engine to start executing */
8521         usleep_range(1000, 1500);
8522
8523         /* wait until engine has finished executing */
8524         rc = hl_poll_timeout(
8525                 hdev,
8526                 mmTPC0_CFG_STATUS + offset,
8527                 status,
8528                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8529                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8530                 1000,
8531                 kernel_timeout);
8532
8533         if (rc) {
8534                 dev_err(hdev->dev,
8535                         "Timeout while waiting for TPC%d icache prefetch\n",
8536                         tpc_id);
8537                 hdev->asic_funcs->set_clock_gating(hdev);
8538                 mutex_unlock(&gaudi->clk_gate_mutex);
8539                 return -EIO;
8540         }
8541
8542         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8543                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8544
8545         /* wait a bit for the engine to start executing */
8546         usleep_range(1000, 1500);
8547
8548         /* wait until engine has finished executing */
8549         rc = hl_poll_timeout(
8550                 hdev,
8551                 mmTPC0_CFG_STATUS + offset,
8552                 status,
8553                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8554                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8555                 1000,
8556                 kernel_timeout);
8557
8558         if (rc) {
8559                 dev_err(hdev->dev,
8560                         "Timeout while waiting for TPC%d vector pipe\n",
8561                         tpc_id);
8562                 hdev->asic_funcs->set_clock_gating(hdev);
8563                 mutex_unlock(&gaudi->clk_gate_mutex);
8564                 return -EIO;
8565         }
8566
8567         rc = hl_poll_timeout(
8568                 hdev,
8569                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8570                 status,
8571                 (status == 0),
8572                 1000,
8573                 kernel_timeout);
8574
8575         hdev->asic_funcs->set_clock_gating(hdev);
8576         mutex_unlock(&gaudi->clk_gate_mutex);
8577
8578         if (rc) {
8579                 dev_err(hdev->dev,
8580                         "Timeout while waiting for TPC%d kernel to execute\n",
8581                         tpc_id);
8582                 return -EIO;
8583         }
8584
8585         return 0;
8586 }
8587
8588 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8589                 struct hl_ctx *ctx)
8590 {
8591         struct gaudi_device *gaudi = hdev->asic_specific;
8592         int min_alloc_order, rc, collective_cb_size;
8593
8594         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8595                 return 0;
8596
8597         hdev->internal_cb_pool_virt_addr =
8598                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8599                                         HOST_SPACE_INTERNAL_CB_SZ,
8600                                         &hdev->internal_cb_pool_dma_addr,
8601                                         GFP_KERNEL | __GFP_ZERO);
8602
8603         if (!hdev->internal_cb_pool_virt_addr)
8604                 return -ENOMEM;
8605
8606         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8607                         sizeof(struct packet_fence);
8608         min_alloc_order = ilog2(collective_cb_size);
8609
8610         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8611         if (!hdev->internal_cb_pool) {
8612                 dev_err(hdev->dev,
8613                         "Failed to create internal CB pool\n");
8614                 rc = -ENOMEM;
8615                 goto free_internal_cb_pool;
8616         }
8617
8618         rc = gen_pool_add(hdev->internal_cb_pool,
8619                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8620                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8621         if (rc) {
8622                 dev_err(hdev->dev,
8623                         "Failed to add memory to internal CB pool\n");
8624                 rc = -EFAULT;
8625                 goto destroy_internal_cb_pool;
8626         }
8627
8628         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8629                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8630                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8631
8632         if (!hdev->internal_cb_va_base) {
8633                 rc = -ENOMEM;
8634                 goto destroy_internal_cb_pool;
8635         }
8636
8637         mutex_lock(&ctx->mmu_lock);
8638         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8639                         hdev->internal_cb_pool_dma_addr,
8640                         HOST_SPACE_INTERNAL_CB_SZ);
8641
8642         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8643         mutex_unlock(&ctx->mmu_lock);
8644
8645         if (rc)
8646                 goto unreserve_internal_cb_pool;
8647
8648         return 0;
8649
8650 unreserve_internal_cb_pool:
8651         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8652                         HOST_SPACE_INTERNAL_CB_SZ);
8653 destroy_internal_cb_pool:
8654         gen_pool_destroy(hdev->internal_cb_pool);
8655 free_internal_cb_pool:
8656         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8657                         HOST_SPACE_INTERNAL_CB_SZ,
8658                         hdev->internal_cb_pool_virt_addr,
8659                         hdev->internal_cb_pool_dma_addr);
8660
8661         return rc;
8662 }
8663
8664 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8665                 struct hl_ctx *ctx)
8666 {
8667         struct gaudi_device *gaudi = hdev->asic_specific;
8668
8669         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8670                 return;
8671
8672         mutex_lock(&ctx->mmu_lock);
8673         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8674                         HOST_SPACE_INTERNAL_CB_SZ);
8675         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8676                         HOST_SPACE_INTERNAL_CB_SZ);
8677         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8678         mutex_unlock(&ctx->mmu_lock);
8679
8680         gen_pool_destroy(hdev->internal_cb_pool);
8681
8682         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8683                         HOST_SPACE_INTERNAL_CB_SZ,
8684                         hdev->internal_cb_pool_virt_addr,
8685                         hdev->internal_cb_pool_dma_addr);
8686 }
8687
8688 static int gaudi_ctx_init(struct hl_ctx *ctx)
8689 {
8690         if (ctx->asid == HL_KERNEL_ASID_ID)
8691                 return 0;
8692
8693         gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8694         return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8695 }
8696
8697 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8698 {
8699         if (ctx->asid == HL_KERNEL_ASID_ID)
8700                 return;
8701
8702         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8703 }
8704
8705 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8706 {
8707         return gaudi_cq_assignment[cq_idx];
8708 }
8709
8710 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8711 {
8712         return sizeof(struct packet_msg_short) +
8713                         sizeof(struct packet_msg_prot) * 2;
8714 }
8715
8716 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8717 {
8718         return sizeof(struct packet_msg_short) * 4 +
8719                         sizeof(struct packet_fence) +
8720                         sizeof(struct packet_msg_prot) * 2;
8721 }
8722
8723 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8724 {
8725         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8726 }
8727
8728 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8729                                 u32 size, bool eb)
8730 {
8731         struct hl_cb *cb = (struct hl_cb *) data;
8732         struct packet_msg_short *pkt;
8733         u32 value, ctl, pkt_size = sizeof(*pkt);
8734
8735         pkt = cb->kernel_address + size;
8736         memset(pkt, 0, pkt_size);
8737
8738         /* Inc by 1, Mode ADD */
8739         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8740         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8741
8742         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8743         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8744         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8745         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8746         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8747         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8748         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8749
8750         pkt->value = cpu_to_le32(value);
8751         pkt->ctl = cpu_to_le32(ctl);
8752
8753         return size + pkt_size;
8754 }
8755
8756 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8757                                         u16 addr)
8758 {
8759         u32 ctl, pkt_size = sizeof(*pkt);
8760
8761         memset(pkt, 0, pkt_size);
8762
8763         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8764         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8765         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8766         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8767         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8768         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8769
8770         pkt->value = cpu_to_le32(value);
8771         pkt->ctl = cpu_to_le32(ctl);
8772
8773         return pkt_size;
8774 }
8775
8776 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8777                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8778                 u16 sob_val, u16 mon_id)
8779 {
8780         u64 monitor_base;
8781         u32 ctl, value, pkt_size = sizeof(*pkt);
8782         u16 msg_addr_offset;
8783         u8 mask;
8784
8785         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8786                 dev_err(hdev->dev,
8787                         "sob_base %u (mask %#x) is not valid\n",
8788                         sob_base, sob_mask);
8789                 return 0;
8790         }
8791
8792         /*
8793          * monitor_base should be the content of the base0 address registers,
8794          * so it will be added to the msg short offsets
8795          */
8796         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8797
8798         msg_addr_offset =
8799                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8800                                 monitor_base;
8801
8802         memset(pkt, 0, pkt_size);
8803
8804         /* Monitor config packet: bind the monitor to a sync object */
8805         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8806         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8807         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8808                         0); /* GREATER OR EQUAL*/
8809         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8810
8811         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8812         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8813         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8814         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8815         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8816         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8817         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8818
8819         pkt->value = cpu_to_le32(value);
8820         pkt->ctl = cpu_to_le32(ctl);
8821
8822         return pkt_size;
8823 }
8824
8825 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8826 {
8827         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8828
8829         memset(pkt, 0, pkt_size);
8830
8831         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8832         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8833         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8834
8835         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8836         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8837         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8838         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8839
8840         pkt->cfg = cpu_to_le32(cfg);
8841         pkt->ctl = cpu_to_le32(ctl);
8842
8843         return pkt_size;
8844 }
8845
8846 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8847 {
8848         u32 offset, nic_index;
8849
8850         switch (queue_id) {
8851         case GAUDI_QUEUE_ID_DMA_0_0:
8852                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8853                 break;
8854         case GAUDI_QUEUE_ID_DMA_0_1:
8855                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8856                 break;
8857         case GAUDI_QUEUE_ID_DMA_0_2:
8858                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8859                 break;
8860         case GAUDI_QUEUE_ID_DMA_0_3:
8861                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8862                 break;
8863         case GAUDI_QUEUE_ID_DMA_1_0:
8864                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8865                 break;
8866         case GAUDI_QUEUE_ID_DMA_1_1:
8867                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8868                 break;
8869         case GAUDI_QUEUE_ID_DMA_1_2:
8870                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8871                 break;
8872         case GAUDI_QUEUE_ID_DMA_1_3:
8873                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8874                 break;
8875         case GAUDI_QUEUE_ID_DMA_5_0:
8876                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8877                 break;
8878         case GAUDI_QUEUE_ID_DMA_5_1:
8879                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8880                 break;
8881         case GAUDI_QUEUE_ID_DMA_5_2:
8882                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8883                 break;
8884         case GAUDI_QUEUE_ID_DMA_5_3:
8885                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8886                 break;
8887         case GAUDI_QUEUE_ID_TPC_7_0:
8888                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8889                 break;
8890         case GAUDI_QUEUE_ID_TPC_7_1:
8891                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8892                 break;
8893         case GAUDI_QUEUE_ID_TPC_7_2:
8894                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8895                 break;
8896         case GAUDI_QUEUE_ID_TPC_7_3:
8897                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8898                 break;
8899         case GAUDI_QUEUE_ID_NIC_0_0:
8900         case GAUDI_QUEUE_ID_NIC_1_0:
8901         case GAUDI_QUEUE_ID_NIC_2_0:
8902         case GAUDI_QUEUE_ID_NIC_3_0:
8903         case GAUDI_QUEUE_ID_NIC_4_0:
8904         case GAUDI_QUEUE_ID_NIC_5_0:
8905         case GAUDI_QUEUE_ID_NIC_6_0:
8906         case GAUDI_QUEUE_ID_NIC_7_0:
8907         case GAUDI_QUEUE_ID_NIC_8_0:
8908         case GAUDI_QUEUE_ID_NIC_9_0:
8909                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8910                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8911                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8912                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8913                 break;
8914         case GAUDI_QUEUE_ID_NIC_0_1:
8915         case GAUDI_QUEUE_ID_NIC_1_1:
8916         case GAUDI_QUEUE_ID_NIC_2_1:
8917         case GAUDI_QUEUE_ID_NIC_3_1:
8918         case GAUDI_QUEUE_ID_NIC_4_1:
8919         case GAUDI_QUEUE_ID_NIC_5_1:
8920         case GAUDI_QUEUE_ID_NIC_6_1:
8921         case GAUDI_QUEUE_ID_NIC_7_1:
8922         case GAUDI_QUEUE_ID_NIC_8_1:
8923         case GAUDI_QUEUE_ID_NIC_9_1:
8924                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8925                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8926                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8927                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8928                 break;
8929         case GAUDI_QUEUE_ID_NIC_0_2:
8930         case GAUDI_QUEUE_ID_NIC_1_2:
8931         case GAUDI_QUEUE_ID_NIC_2_2:
8932         case GAUDI_QUEUE_ID_NIC_3_2:
8933         case GAUDI_QUEUE_ID_NIC_4_2:
8934         case GAUDI_QUEUE_ID_NIC_5_2:
8935         case GAUDI_QUEUE_ID_NIC_6_2:
8936         case GAUDI_QUEUE_ID_NIC_7_2:
8937         case GAUDI_QUEUE_ID_NIC_8_2:
8938         case GAUDI_QUEUE_ID_NIC_9_2:
8939                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8940                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8941                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8942                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8943                 break;
8944         case GAUDI_QUEUE_ID_NIC_0_3:
8945         case GAUDI_QUEUE_ID_NIC_1_3:
8946         case GAUDI_QUEUE_ID_NIC_2_3:
8947         case GAUDI_QUEUE_ID_NIC_3_3:
8948         case GAUDI_QUEUE_ID_NIC_4_3:
8949         case GAUDI_QUEUE_ID_NIC_5_3:
8950         case GAUDI_QUEUE_ID_NIC_6_3:
8951         case GAUDI_QUEUE_ID_NIC_7_3:
8952         case GAUDI_QUEUE_ID_NIC_8_3:
8953         case GAUDI_QUEUE_ID_NIC_9_3:
8954                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8955                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8956                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8957                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8958                 break;
8959         default:
8960                 return -EINVAL;
8961         }
8962
8963         *addr = CFG_BASE + offset;
8964
8965         return 0;
8966 }
8967
8968 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8969 {
8970         u64 monitor_base;
8971         u32 size = 0;
8972         u16 msg_addr_offset;
8973
8974         /*
8975          * monitor_base should be the content of the base0 address registers,
8976          * so it will be added to the msg short offsets
8977          */
8978         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8979
8980         /* First monitor config packet: low address of the sync */
8981         msg_addr_offset =
8982                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8983                                 monitor_base;
8984
8985         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8986                                         msg_addr_offset);
8987
8988         /* Second monitor config packet: high address of the sync */
8989         msg_addr_offset =
8990                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8991                                 monitor_base;
8992
8993         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8994                                         msg_addr_offset);
8995
8996         /*
8997          * Third monitor config packet: the payload, i.e. what to write when the
8998          * sync triggers
8999          */
9000         msg_addr_offset =
9001                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
9002                                 monitor_base;
9003
9004         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
9005
9006         return size;
9007 }
9008
9009 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
9010                                 struct hl_gen_wait_properties *prop)
9011 {
9012         struct hl_cb *cb = (struct hl_cb *) prop->data;
9013         void *buf = cb->kernel_address;
9014         u64 fence_addr = 0;
9015         u32 size = prop->size;
9016
9017         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
9018                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9019                                 prop->q_idx);
9020                 return 0;
9021         }
9022
9023         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9024         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9025                         prop->sob_mask, prop->sob_val, prop->mon_id);
9026         size += gaudi_add_fence_pkt(buf + size);
9027
9028         return size;
9029 }
9030
9031 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9032 {
9033         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9034         int rc;
9035
9036         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9037                 hw_sob->sob_id);
9038
9039         rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
9040                         CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9041                         hw_sob->sob_id * 4, 1, 0);
9042         if (rc)
9043                 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
9044
9045         kref_init(&hw_sob->kref);
9046 }
9047
9048 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9049 {
9050         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9051                                                         HL_POWER9_HOST_MAGIC) {
9052                 hdev->power9_64bit_dma_enable = 1;
9053                 hdev->dma_mask = 64;
9054         } else {
9055                 hdev->power9_64bit_dma_enable = 0;
9056                 hdev->dma_mask = 48;
9057         }
9058 }
9059
9060 static u64 gaudi_get_device_time(struct hl_device *hdev)
9061 {
9062         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9063
9064         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9065 }
9066
9067 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9068                                 u32 *block_size, u32 *block_id)
9069 {
9070         return -EPERM;
9071 }
9072
9073 static int gaudi_block_mmap(struct hl_device *hdev,
9074                                 struct vm_area_struct *vma,
9075                                 u32 block_id, u32 block_size)
9076 {
9077         return -EPERM;
9078 }
9079
9080 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9081 {
9082         struct cpu_dyn_regs *dyn_regs =
9083                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9084         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9085                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9086                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9087
9088         WREG32(irq_handler_offset,
9089                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9090 }
9091
9092 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9093 {
9094         switch (pll_idx) {
9095         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9096         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9097         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9098         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9099         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9100         case HL_GAUDI_MME_PLL: return MME_PLL;
9101         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9102         case HL_GAUDI_IF_PLL: return IF_PLL;
9103         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9104         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9105         default: return -EINVAL;
9106         }
9107 }
9108
9109 static int gaudi_add_sync_to_engine_map_entry(
9110         struct hl_sync_to_engine_map *map, u32 reg_value,
9111         enum hl_sync_engine_type engine_type, u32 engine_id)
9112 {
9113         struct hl_sync_to_engine_map_entry *entry;
9114
9115         /* Reg value represents a partial address of sync object,
9116          * it is used as unique identifier. For this we need to
9117          * clear the cutoff cfg base bits from the value.
9118          */
9119         if (reg_value == 0 || reg_value == 0xffffffff)
9120                 return 0;
9121         reg_value -= (u32)CFG_BASE;
9122
9123         /* create a new hash entry */
9124         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9125         if (!entry)
9126                 return -ENOMEM;
9127         entry->engine_type = engine_type;
9128         entry->engine_id = engine_id;
9129         entry->sync_id = reg_value;
9130         hash_add(map->tb, &entry->node, reg_value);
9131
9132         return 0;
9133 }
9134
9135 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9136                                 struct hl_sync_to_engine_map *map)
9137 {
9138         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9139         struct gaudi_device *gaudi = hdev->asic_specific;
9140         int i, j, rc;
9141         u32 reg_value;
9142
9143         /* Iterate over TPC engines */
9144         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9145                 /* TPC registered must be accessed with clock gating disabled */
9146                 mutex_lock(&gaudi->clk_gate_mutex);
9147                 hdev->asic_funcs->disable_clock_gating(hdev);
9148
9149                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9150                                         sds->props[SP_NEXT_TPC] * i);
9151
9152                 /* We can reenable clock_gating */
9153                 hdev->asic_funcs->set_clock_gating(hdev);
9154                 mutex_unlock(&gaudi->clk_gate_mutex);
9155
9156                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9157                                                         ENGINE_TPC, i);
9158                 if (rc)
9159                         goto free_sync_to_engine_map;
9160         }
9161
9162         /* Iterate over MME engines */
9163         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9164                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9165                         /* MME registered must be accessed with clock gating
9166                          * disabled
9167                          */
9168                         mutex_lock(&gaudi->clk_gate_mutex);
9169                         hdev->asic_funcs->disable_clock_gating(hdev);
9170
9171                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9172                                                 sds->props[SP_NEXT_MME] * i +
9173                                                 j * sizeof(u32));
9174
9175                         /* We can reenable clock_gating */
9176                         hdev->asic_funcs->set_clock_gating(hdev);
9177                         mutex_unlock(&gaudi->clk_gate_mutex);
9178
9179                         rc = gaudi_add_sync_to_engine_map_entry(
9180                                 map, reg_value, ENGINE_MME,
9181                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9182                         if (rc)
9183                                 goto free_sync_to_engine_map;
9184                 }
9185         }
9186
9187         /* Iterate over DMA engines */
9188         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9189                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9190                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9191                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9192                                                         ENGINE_DMA, i);
9193                 if (rc)
9194                         goto free_sync_to_engine_map;
9195         }
9196
9197         return 0;
9198
9199 free_sync_to_engine_map:
9200         hl_state_dump_free_sync_to_engine_map(map);
9201
9202         return rc;
9203 }
9204
9205 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9206 {
9207         return FIELD_GET(
9208                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9209                 mon->status);
9210 }
9211
9212 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9213                                 struct hl_device *hdev,
9214                                 struct hl_mon_state_dump *mon)
9215 {
9216         const char *name;
9217         char scratch_buf1[BIN_REG_STRING_SIZE],
9218                 scratch_buf2[BIN_REG_STRING_SIZE];
9219
9220         name = hl_state_dump_get_monitor_name(hdev, mon);
9221         if (!name)
9222                 name = "";
9223
9224         return hl_snprintf_resize(
9225                 buf, size, offset,
9226                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9227                 mon->id, name,
9228                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9229                                 mon->arm_data),
9230                 hl_format_as_binary(
9231                         scratch_buf1, sizeof(scratch_buf1),
9232                         FIELD_GET(
9233                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9234                                 mon->arm_data)),
9235                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9236                                 mon->arm_data),
9237                 mon->wr_data,
9238                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9239                 hl_format_as_binary(
9240                         scratch_buf2, sizeof(scratch_buf2),
9241                         FIELD_GET(
9242                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9243                                 mon->status)));
9244 }
9245
9246
9247 static int gaudi_print_fences_single_engine(
9248         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9249         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9250         size_t *size, size_t *offset)
9251 {
9252         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9253         int rc = -ENOMEM, i;
9254         u32 *statuses, *fences;
9255
9256         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9257                         sizeof(*statuses), GFP_KERNEL);
9258         if (!statuses)
9259                 goto out;
9260
9261         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9262                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9263                          sizeof(*fences), GFP_KERNEL);
9264         if (!fences)
9265                 goto free_status;
9266
9267         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9268                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9269
9270         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9271                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9272                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9273
9274         /* The actual print */
9275         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9276                 u32 fence_id;
9277                 u64 fence_cnt, fence_rdata;
9278                 const char *engine_name;
9279
9280                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9281                         statuses[i]))
9282                         continue;
9283
9284                 fence_id =
9285                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9286                 fence_cnt = base_offset + CFG_BASE +
9287                         sizeof(u32) *
9288                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9289                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9290                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9291                 engine_name = hl_sync_engine_to_string(engine_type);
9292
9293                 rc = hl_snprintf_resize(
9294                         buf, size, offset,
9295                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9296                         engine_name, engine_id,
9297                         i, fence_id,
9298                         fence_cnt, engine_name, engine_id, fence_id, i,
9299                         fence_rdata, engine_name, engine_id, fence_id, i,
9300                         fences[fence_id],
9301                         statuses[i]);
9302                 if (rc)
9303                         goto free_fences;
9304         }
9305
9306         rc = 0;
9307
9308 free_fences:
9309         kfree(fences);
9310 free_status:
9311         kfree(statuses);
9312 out:
9313         return rc;
9314 }
9315
9316
9317 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9318         .monitor_valid = gaudi_monitor_valid,
9319         .print_single_monitor = gaudi_print_single_monitor,
9320         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9321         .print_fences_single_engine = gaudi_print_fences_single_engine,
9322 };
9323
9324 static void gaudi_state_dump_init(struct hl_device *hdev)
9325 {
9326         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9327         int i;
9328
9329         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9330                 hash_add(sds->so_id_to_str_tb,
9331                         &gaudi_so_id_to_str[i].node,
9332                         gaudi_so_id_to_str[i].id);
9333
9334         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9335                 hash_add(sds->monitor_id_to_str_tb,
9336                         &gaudi_monitor_id_to_str[i].node,
9337                         gaudi_monitor_id_to_str[i].id);
9338
9339         sds->props = gaudi_state_dump_specs_props;
9340
9341         sds->sync_namager_names = gaudi_sync_manager_names;
9342
9343         sds->funcs = gaudi_state_dump_funcs;
9344 }
9345
9346 static const struct hl_asic_funcs gaudi_funcs = {
9347         .early_init = gaudi_early_init,
9348         .early_fini = gaudi_early_fini,
9349         .late_init = gaudi_late_init,
9350         .late_fini = gaudi_late_fini,
9351         .sw_init = gaudi_sw_init,
9352         .sw_fini = gaudi_sw_fini,
9353         .hw_init = gaudi_hw_init,
9354         .hw_fini = gaudi_hw_fini,
9355         .halt_engines = gaudi_halt_engines,
9356         .suspend = gaudi_suspend,
9357         .resume = gaudi_resume,
9358         .mmap = gaudi_mmap,
9359         .ring_doorbell = gaudi_ring_doorbell,
9360         .pqe_write = gaudi_pqe_write,
9361         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9362         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9363         .scrub_device_mem = gaudi_scrub_device_mem,
9364         .get_int_queue_base = gaudi_get_int_queue_base,
9365         .test_queues = gaudi_test_queues,
9366         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9367         .asic_dma_pool_free = gaudi_dma_pool_free,
9368         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9369         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9370         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9371         .cs_parser = gaudi_cs_parser,
9372         .asic_dma_map_sg = gaudi_dma_map_sg,
9373         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9374         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9375         .update_eq_ci = gaudi_update_eq_ci,
9376         .context_switch = gaudi_context_switch,
9377         .restore_phase_topology = gaudi_restore_phase_topology,
9378         .debugfs_read32 = gaudi_debugfs_read32,
9379         .debugfs_write32 = gaudi_debugfs_write32,
9380         .debugfs_read64 = gaudi_debugfs_read64,
9381         .debugfs_write64 = gaudi_debugfs_write64,
9382         .debugfs_read_dma = gaudi_debugfs_read_dma,
9383         .add_device_attr = gaudi_add_device_attr,
9384         .handle_eqe = gaudi_handle_eqe,
9385         .set_pll_profile = gaudi_set_pll_profile,
9386         .get_events_stat = gaudi_get_events_stat,
9387         .read_pte = gaudi_read_pte,
9388         .write_pte = gaudi_write_pte,
9389         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9390         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9391         .send_heartbeat = gaudi_send_heartbeat,
9392         .set_clock_gating = gaudi_set_clock_gating,
9393         .disable_clock_gating = gaudi_disable_clock_gating,
9394         .debug_coresight = gaudi_debug_coresight,
9395         .is_device_idle = gaudi_is_device_idle,
9396         .soft_reset_late_init = gaudi_soft_reset_late_init,
9397         .hw_queues_lock = gaudi_hw_queues_lock,
9398         .hw_queues_unlock = gaudi_hw_queues_unlock,
9399         .get_pci_id = gaudi_get_pci_id,
9400         .get_eeprom_data = gaudi_get_eeprom_data,
9401         .send_cpu_message = gaudi_send_cpu_message,
9402         .pci_bars_map = gaudi_pci_bars_map,
9403         .init_iatu = gaudi_init_iatu,
9404         .rreg = hl_rreg,
9405         .wreg = hl_wreg,
9406         .halt_coresight = gaudi_halt_coresight,
9407         .ctx_init = gaudi_ctx_init,
9408         .ctx_fini = gaudi_ctx_fini,
9409         .get_clk_rate = gaudi_get_clk_rate,
9410         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9411         .load_firmware_to_device = gaudi_load_firmware_to_device,
9412         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9413         .get_signal_cb_size = gaudi_get_signal_cb_size,
9414         .get_wait_cb_size = gaudi_get_wait_cb_size,
9415         .gen_signal_cb = gaudi_gen_signal_cb,
9416         .gen_wait_cb = gaudi_gen_wait_cb,
9417         .reset_sob = gaudi_reset_sob,
9418         .reset_sob_group = gaudi_reset_sob_group,
9419         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9420         .get_device_time = gaudi_get_device_time,
9421         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9422         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9423         .scramble_addr = hl_mmu_scramble_addr,
9424         .descramble_addr = hl_mmu_descramble_addr,
9425         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9426         .get_hw_block_id = gaudi_get_hw_block_id,
9427         .hw_block_mmap = gaudi_block_mmap,
9428         .enable_events_from_fw = gaudi_enable_events_from_fw,
9429         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9430         .init_firmware_loader = gaudi_init_firmware_loader,
9431         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9432         .state_dump_init = gaudi_state_dump_init,
9433         .get_sob_addr = gaudi_get_sob_addr
9434 };
9435
9436 /**
9437  * gaudi_set_asic_funcs - set GAUDI function pointers
9438  *
9439  * @hdev: pointer to hl_device structure
9440  *
9441  */
9442 void gaudi_set_asic_funcs(struct hl_device *hdev)
9443 {
9444         hdev->asic_funcs = &gaudi_funcs;
9445 }