habanalabs/gaudi: fix information printed on SM event
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115                 "gaudi cpu eq"
116 };
117
118 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
119         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
121         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
124         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
127 };
128
129 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130         [0] = GAUDI_QUEUE_ID_DMA_0_0,
131         [1] = GAUDI_QUEUE_ID_DMA_0_1,
132         [2] = GAUDI_QUEUE_ID_DMA_0_2,
133         [3] = GAUDI_QUEUE_ID_DMA_0_3,
134         [4] = GAUDI_QUEUE_ID_DMA_1_0,
135         [5] = GAUDI_QUEUE_ID_DMA_1_1,
136         [6] = GAUDI_QUEUE_ID_DMA_1_2,
137         [7] = GAUDI_QUEUE_ID_DMA_1_3,
138 };
139
140 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
142         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
143         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
144         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
145         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
146         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
147         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
148         [PACKET_FENCE]          = sizeof(struct packet_fence),
149         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
150         [PACKET_NOP]            = sizeof(struct packet_nop),
151         [PACKET_STOP]           = sizeof(struct packet_stop),
152         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
153         [PACKET_WAIT]           = sizeof(struct packet_wait),
154         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
155 };
156
157 static inline bool validate_packet_id(enum packet_id id)
158 {
159         switch (id) {
160         case PACKET_WREG_32:
161         case PACKET_WREG_BULK:
162         case PACKET_MSG_LONG:
163         case PACKET_MSG_SHORT:
164         case PACKET_CP_DMA:
165         case PACKET_REPEAT:
166         case PACKET_MSG_PROT:
167         case PACKET_FENCE:
168         case PACKET_LIN_DMA:
169         case PACKET_NOP:
170         case PACKET_STOP:
171         case PACKET_ARB_POINT:
172         case PACKET_WAIT:
173         case PACKET_LOAD_AND_EXE:
174                 return true;
175         default:
176                 return false;
177         }
178 }
179
180 static const char * const
181 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182         "tpc_address_exceed_slm",
183         "tpc_div_by_0",
184         "tpc_spu_mac_overflow",
185         "tpc_spu_addsub_overflow",
186         "tpc_spu_abs_overflow",
187         "tpc_spu_fp_dst_nan_inf",
188         "tpc_spu_fp_dst_denorm",
189         "tpc_vpu_mac_overflow",
190         "tpc_vpu_addsub_overflow",
191         "tpc_vpu_abs_overflow",
192         "tpc_vpu_fp_dst_nan_inf",
193         "tpc_vpu_fp_dst_denorm",
194         "tpc_assertions",
195         "tpc_illegal_instruction",
196         "tpc_pc_wrap_around",
197         "tpc_qm_sw_err",
198         "tpc_hbw_rresp_err",
199         "tpc_hbw_bresp_err",
200         "tpc_lbw_rresp_err",
201         "tpc_lbw_bresp_err"
202 };
203
204 static const char * const
205 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
206         "PQ AXI HBW error",
207         "CQ AXI HBW error",
208         "CP AXI HBW error",
209         "CP error due to undefined OPCODE",
210         "CP encountered STOP OPCODE",
211         "CP AXI LBW error",
212         "CP WRREG32 or WRBULK returned error",
213         "N/A",
214         "FENCE 0 inc over max value and clipped",
215         "FENCE 1 inc over max value and clipped",
216         "FENCE 2 inc over max value and clipped",
217         "FENCE 3 inc over max value and clipped",
218         "FENCE 0 dec under min value and clipped",
219         "FENCE 1 dec under min value and clipped",
220         "FENCE 2 dec under min value and clipped",
221         "FENCE 3 dec under min value and clipped"
222 };
223
224 static const char * const
225 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226         "Choice push while full error",
227         "Choice Q watchdog error",
228         "MSG AXI LBW returned with error"
229 };
230
231 enum gaudi_sm_sei_cause {
232         GAUDI_SM_SEI_SO_OVERFLOW,
233         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234         GAUDI_SM_SEI_AXI_RESPONSE_ERR
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 /* The order here is opposite to the order of the indexing in the h/w.
434  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
435  */
436 static const char * const gaudi_sync_manager_names[] = {
437         "SYNC_MGR_E_N",
438         "SYNC_MGR_W_N",
439         "SYNC_MGR_E_S",
440         "SYNC_MGR_W_S",
441         NULL
442 };
443
444 struct ecc_info_extract_params {
445         u64 block_address;
446         u32 num_memories;
447         bool derr;
448         bool disable_clock_gating;
449 };
450
451 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
452                                                                 u64 phys_addr);
453 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
454                                         struct hl_cs_job *job);
455 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
456                                         u32 size, u64 val);
457 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
458                                         u32 num_regs, u32 val);
459 static int gaudi_schedule_register_memset(struct hl_device *hdev,
460                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
461 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
462                                 u32 tpc_id);
463 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
464 static int gaudi_cpucp_info_get(struct hl_device *hdev);
465 static void gaudi_disable_clock_gating(struct hl_device *hdev);
466 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
467 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
468                                 u32 size, bool eb);
469 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
470                                 struct hl_gen_wait_properties *prop);
471
472 static inline enum hl_collective_mode
473 get_collective_mode(struct hl_device *hdev, u32 queue_id)
474 {
475         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
476                 return HL_COLLECTIVE_MASTER;
477
478         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
479                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
480                 return HL_COLLECTIVE_SLAVE;
481
482         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
483                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
484                 return HL_COLLECTIVE_SLAVE;
485
486         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
487                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
488                 return HL_COLLECTIVE_SLAVE;
489
490         return HL_COLLECTIVE_NOT_SUPPORTED;
491 }
492
493 static inline void set_default_power_values(struct hl_device *hdev)
494 {
495         struct asic_fixed_properties *prop = &hdev->asic_prop;
496
497         if (hdev->card_type == cpucp_card_type_pmc) {
498                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
499                 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
500         } else {
501                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
502                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
503         }
504 }
505
506 static int gaudi_set_fixed_properties(struct hl_device *hdev)
507 {
508         struct asic_fixed_properties *prop = &hdev->asic_prop;
509         u32 num_sync_stream_queues = 0;
510         int i;
511
512         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
513         prop->hw_queues_props = kcalloc(prop->max_queues,
514                         sizeof(struct hw_queue_properties),
515                         GFP_KERNEL);
516
517         if (!prop->hw_queues_props)
518                 return -ENOMEM;
519
520         for (i = 0 ; i < prop->max_queues ; i++) {
521                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
522                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
523                         prop->hw_queues_props[i].driver_only = 0;
524                         prop->hw_queues_props[i].supports_sync_stream = 1;
525                         prop->hw_queues_props[i].cb_alloc_flags =
526                                 CB_ALLOC_KERNEL;
527                         num_sync_stream_queues++;
528                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
529                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
530                         prop->hw_queues_props[i].driver_only = 1;
531                         prop->hw_queues_props[i].supports_sync_stream = 0;
532                         prop->hw_queues_props[i].cb_alloc_flags =
533                                 CB_ALLOC_KERNEL;
534                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
535                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
536                         prop->hw_queues_props[i].driver_only = 0;
537                         prop->hw_queues_props[i].supports_sync_stream = 0;
538                         prop->hw_queues_props[i].cb_alloc_flags =
539                                 CB_ALLOC_USER;
540
541                 }
542                 prop->hw_queues_props[i].collective_mode =
543                                                 get_collective_mode(hdev, i);
544         }
545
546         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
547         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
548         prop->collective_first_sob = 0;
549         prop->collective_first_mon = 0;
550
551         /* 2 SOBs per internal queue stream are reserved for collective */
552         prop->sync_stream_first_sob =
553                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
554                         * QMAN_STREAMS * HL_RSVD_SOBS;
555
556         /* 1 monitor per internal queue stream are reserved for collective
557          * 2 monitors per external queue stream are reserved for collective
558          */
559         prop->sync_stream_first_mon =
560                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
561                         (NUMBER_OF_EXT_HW_QUEUES * 2);
562
563         prop->dram_base_address = DRAM_PHYS_BASE;
564         prop->dram_size = GAUDI_HBM_SIZE_32GB;
565         prop->dram_end_address = prop->dram_base_address +
566                                         prop->dram_size;
567         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
568
569         prop->sram_base_address = SRAM_BASE_ADDR;
570         prop->sram_size = SRAM_SIZE;
571         prop->sram_end_address = prop->sram_base_address +
572                                         prop->sram_size;
573         prop->sram_user_base_address = prop->sram_base_address +
574                                         SRAM_USER_BASE_OFFSET;
575
576         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
577         if (hdev->pldm)
578                 prop->mmu_pgt_size = 0x800000; /* 8MB */
579         else
580                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
581         prop->mmu_pte_size = HL_PTE_SIZE;
582         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
583         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
584         prop->dram_page_size = PAGE_SIZE_2MB;
585         prop->dram_supports_virtual_memory = false;
586
587         prop->pmmu.hop0_shift = HOP0_SHIFT;
588         prop->pmmu.hop1_shift = HOP1_SHIFT;
589         prop->pmmu.hop2_shift = HOP2_SHIFT;
590         prop->pmmu.hop3_shift = HOP3_SHIFT;
591         prop->pmmu.hop4_shift = HOP4_SHIFT;
592         prop->pmmu.hop0_mask = HOP0_MASK;
593         prop->pmmu.hop1_mask = HOP1_MASK;
594         prop->pmmu.hop2_mask = HOP2_MASK;
595         prop->pmmu.hop3_mask = HOP3_MASK;
596         prop->pmmu.hop4_mask = HOP4_MASK;
597         prop->pmmu.start_addr = VA_HOST_SPACE_START;
598         prop->pmmu.end_addr =
599                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
600         prop->pmmu.page_size = PAGE_SIZE_4KB;
601         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
602
603         /* PMMU and HPMMU are the same except of page size */
604         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
605         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
606
607         /* shifts and masks are the same in PMMU and DMMU */
608         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
609         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
610         prop->dmmu.end_addr = VA_HOST_SPACE_END;
611         prop->dmmu.page_size = PAGE_SIZE_2MB;
612
613         prop->cfg_size = CFG_SIZE;
614         prop->max_asid = MAX_ASID;
615         prop->num_of_events = GAUDI_EVENT_SIZE;
616         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
617
618         set_default_power_values(hdev);
619
620         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
621         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
622
623         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
624         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
625
626         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
627                                         CARD_NAME_MAX_LEN);
628
629         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
630
631         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
632                         prop->sync_stream_first_sob +
633                         (num_sync_stream_queues * HL_RSVD_SOBS);
634         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
635                         prop->sync_stream_first_mon +
636                         (num_sync_stream_queues * HL_RSVD_MONS);
637
638         prop->first_available_user_msix_interrupt = USHRT_MAX;
639
640         for (i = 0 ; i < HL_MAX_DCORES ; i++)
641                 prop->first_available_cq[i] = USHRT_MAX;
642
643         prop->fw_cpu_boot_dev_sts0_valid = false;
644         prop->fw_cpu_boot_dev_sts1_valid = false;
645         prop->hard_reset_done_by_fw = false;
646         prop->gic_interrupts_enable = true;
647
648         return 0;
649 }
650
651 static int gaudi_pci_bars_map(struct hl_device *hdev)
652 {
653         static const char * const name[] = {"SRAM", "CFG", "HBM"};
654         bool is_wc[3] = {false, false, true};
655         int rc;
656
657         rc = hl_pci_bars_map(hdev, name, is_wc);
658         if (rc)
659                 return rc;
660
661         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
662                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
663
664         return 0;
665 }
666
667 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
668 {
669         struct gaudi_device *gaudi = hdev->asic_specific;
670         struct hl_inbound_pci_region pci_region;
671         u64 old_addr = addr;
672         int rc;
673
674         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
675                 return old_addr;
676
677         if (hdev->asic_prop.iatu_done_by_fw)
678                 return U64_MAX;
679
680         /* Inbound Region 2 - Bar 4 - Point to HBM */
681         pci_region.mode = PCI_BAR_MATCH_MODE;
682         pci_region.bar = HBM_BAR_ID;
683         pci_region.addr = addr;
684         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
685         if (rc)
686                 return U64_MAX;
687
688         if (gaudi) {
689                 old_addr = gaudi->hbm_bar_cur_addr;
690                 gaudi->hbm_bar_cur_addr = addr;
691         }
692
693         return old_addr;
694 }
695
696 static int gaudi_init_iatu(struct hl_device *hdev)
697 {
698         struct hl_inbound_pci_region inbound_region;
699         struct hl_outbound_pci_region outbound_region;
700         int rc;
701
702         if (hdev->asic_prop.iatu_done_by_fw)
703                 return 0;
704
705         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
706         inbound_region.mode = PCI_BAR_MATCH_MODE;
707         inbound_region.bar = SRAM_BAR_ID;
708         inbound_region.addr = SRAM_BASE_ADDR;
709         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
710         if (rc)
711                 goto done;
712
713         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
714         inbound_region.mode = PCI_BAR_MATCH_MODE;
715         inbound_region.bar = CFG_BAR_ID;
716         inbound_region.addr = SPI_FLASH_BASE_ADDR;
717         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
718         if (rc)
719                 goto done;
720
721         /* Inbound Region 2 - Bar 4 - Point to HBM */
722         inbound_region.mode = PCI_BAR_MATCH_MODE;
723         inbound_region.bar = HBM_BAR_ID;
724         inbound_region.addr = DRAM_PHYS_BASE;
725         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
726         if (rc)
727                 goto done;
728
729         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
730
731         /* Outbound Region 0 - Point to Host */
732         outbound_region.addr = HOST_PHYS_BASE;
733         outbound_region.size = HOST_PHYS_SIZE;
734         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
735
736 done:
737         return rc;
738 }
739
740 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
741 {
742         return RREG32(mmHW_STATE);
743 }
744
745 static int gaudi_early_init(struct hl_device *hdev)
746 {
747         struct asic_fixed_properties *prop = &hdev->asic_prop;
748         struct pci_dev *pdev = hdev->pdev;
749         u32 fw_boot_status;
750         int rc;
751
752         rc = gaudi_set_fixed_properties(hdev);
753         if (rc) {
754                 dev_err(hdev->dev, "Failed setting fixed properties\n");
755                 return rc;
756         }
757
758         /* Check BAR sizes */
759         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
760                 dev_err(hdev->dev,
761                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
762                         SRAM_BAR_ID,
763                         (unsigned long long) pci_resource_len(pdev,
764                                                         SRAM_BAR_ID),
765                         SRAM_BAR_SIZE);
766                 rc = -ENODEV;
767                 goto free_queue_props;
768         }
769
770         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
771                 dev_err(hdev->dev,
772                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
773                         CFG_BAR_ID,
774                         (unsigned long long) pci_resource_len(pdev,
775                                                                 CFG_BAR_ID),
776                         CFG_BAR_SIZE);
777                 rc = -ENODEV;
778                 goto free_queue_props;
779         }
780
781         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
782
783         /* If FW security is enabled at this point it means no access to ELBI */
784         if (hdev->asic_prop.fw_security_enabled) {
785                 hdev->asic_prop.iatu_done_by_fw = true;
786
787                 /*
788                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
789                  * decision can only be taken based on PCI ID security.
790                  */
791                 hdev->asic_prop.gic_interrupts_enable = false;
792                 goto pci_init;
793         }
794
795         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
796                                 &fw_boot_status);
797         if (rc)
798                 goto free_queue_props;
799
800         /* Check whether FW is configuring iATU */
801         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
802                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
803                 hdev->asic_prop.iatu_done_by_fw = true;
804
805 pci_init:
806         rc = hl_pci_init(hdev);
807         if (rc)
808                 goto free_queue_props;
809
810         /* Before continuing in the initialization, we need to read the preboot
811          * version to determine whether we run with a security-enabled firmware
812          */
813         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
814                                         mmCPU_BOOT_DEV_STS0,
815                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
816                                         mmCPU_BOOT_ERR1,
817                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
818         if (rc) {
819                 if (hdev->reset_on_preboot_fail)
820                         hdev->asic_funcs->hw_fini(hdev, true);
821                 goto pci_fini;
822         }
823
824         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
825                 dev_info(hdev->dev,
826                         "H/W state is dirty, must reset before initializing\n");
827                 hdev->asic_funcs->hw_fini(hdev, true);
828         }
829
830         return 0;
831
832 pci_fini:
833         hl_pci_fini(hdev);
834 free_queue_props:
835         kfree(hdev->asic_prop.hw_queues_props);
836         return rc;
837 }
838
839 static int gaudi_early_fini(struct hl_device *hdev)
840 {
841         kfree(hdev->asic_prop.hw_queues_props);
842         hl_pci_fini(hdev);
843
844         return 0;
845 }
846
847 /**
848  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
849  *
850  * @hdev: pointer to hl_device structure
851  *
852  */
853 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
854 {
855         struct asic_fixed_properties *prop = &hdev->asic_prop;
856         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
857         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
858         int rc;
859
860         if (hdev->asic_prop.fw_security_enabled) {
861                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
862
863                 if (rc)
864                         return rc;
865
866                 freq = pll_freq_arr[2];
867         } else {
868                 /* Backward compatibility */
869                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
870                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
871                 nr = RREG32(mmPSOC_CPU_PLL_NR);
872                 nf = RREG32(mmPSOC_CPU_PLL_NF);
873                 od = RREG32(mmPSOC_CPU_PLL_OD);
874
875                 if (div_sel == DIV_SEL_REF_CLK ||
876                                 div_sel == DIV_SEL_DIVIDED_REF) {
877                         if (div_sel == DIV_SEL_REF_CLK)
878                                 freq = PLL_REF_CLK;
879                         else
880                                 freq = PLL_REF_CLK / (div_fctr + 1);
881                 } else if (div_sel == DIV_SEL_PLL_CLK ||
882                         div_sel == DIV_SEL_DIVIDED_PLL) {
883                         pll_clk = PLL_REF_CLK * (nf + 1) /
884                                         ((nr + 1) * (od + 1));
885                         if (div_sel == DIV_SEL_PLL_CLK)
886                                 freq = pll_clk;
887                         else
888                                 freq = pll_clk / (div_fctr + 1);
889                 } else {
890                         dev_warn(hdev->dev,
891                                 "Received invalid div select value: %d",
892                                 div_sel);
893                         freq = 0;
894                 }
895         }
896
897         prop->psoc_timestamp_frequency = freq;
898         prop->psoc_pci_pll_nr = nr;
899         prop->psoc_pci_pll_nf = nf;
900         prop->psoc_pci_pll_od = od;
901         prop->psoc_pci_pll_div_factor = div_fctr;
902
903         return 0;
904 }
905
906 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
907                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
908 {
909         struct asic_fixed_properties *prop = &hdev->asic_prop;
910         struct packet_lin_dma *init_tpc_mem_pkt;
911         struct hl_cs_job *job;
912         struct hl_cb *cb;
913         u64 dst_addr;
914         u32 cb_size, ctl;
915         u8 tpc_id;
916         int rc;
917
918         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
919         if (!cb)
920                 return -EFAULT;
921
922         init_tpc_mem_pkt = cb->kernel_address;
923         cb_size = sizeof(*init_tpc_mem_pkt);
924         memset(init_tpc_mem_pkt, 0, cb_size);
925
926         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
927
928         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
929         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
930         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
931         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
932
933         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
934
935         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
936         dst_addr = (prop->sram_user_base_address &
937                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
938                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
939         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
940
941         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
942         if (!job) {
943                 dev_err(hdev->dev, "Failed to allocate a new job\n");
944                 rc = -ENOMEM;
945                 goto release_cb;
946         }
947
948         job->id = 0;
949         job->user_cb = cb;
950         atomic_inc(&job->user_cb->cs_cnt);
951         job->user_cb_size = cb_size;
952         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
953         job->patched_cb = job->user_cb;
954         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
955
956         hl_debugfs_add_job(hdev, job);
957
958         rc = gaudi_send_job_on_qman0(hdev, job);
959
960         if (rc)
961                 goto free_job;
962
963         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
964                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
965                 if (rc)
966                         break;
967         }
968
969 free_job:
970         hl_userptr_delete_list(hdev, &job->userptr_list);
971         hl_debugfs_remove_job(hdev, job);
972         kfree(job);
973         atomic_dec(&cb->cs_cnt);
974
975 release_cb:
976         hl_cb_put(cb);
977         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
978
979         return rc;
980 }
981
982 /*
983  * gaudi_init_tpc_mem() - Initialize TPC memories.
984  * @hdev: Pointer to hl_device structure.
985  *
986  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
987  *
988  * Return: 0 for success, negative value for error.
989  */
990 static int gaudi_init_tpc_mem(struct hl_device *hdev)
991 {
992         const struct firmware *fw;
993         size_t fw_size;
994         void *cpu_addr;
995         dma_addr_t dma_handle;
996         int rc, count = 5;
997
998 again:
999         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1000         if (rc == -EINTR && count-- > 0) {
1001                 msleep(50);
1002                 goto again;
1003         }
1004
1005         if (rc) {
1006                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1007                                 GAUDI_TPC_FW_FILE);
1008                 goto out;
1009         }
1010
1011         fw_size = fw->size;
1012         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1013                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1014         if (!cpu_addr) {
1015                 dev_err(hdev->dev,
1016                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1017                         fw_size);
1018                 rc = -ENOMEM;
1019                 goto out;
1020         }
1021
1022         memcpy(cpu_addr, fw->data, fw_size);
1023
1024         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1025
1026         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1027                         dma_handle);
1028
1029 out:
1030         release_firmware(fw);
1031         return rc;
1032 }
1033
1034 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1035 {
1036         struct gaudi_device *gaudi = hdev->asic_specific;
1037         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1038         struct hl_hw_queue *q;
1039         u32 i, sob_id, sob_group_id, queue_id;
1040
1041         /* Iterate through SOB groups and assign a SOB for each slave queue */
1042         sob_group_id =
1043                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1044         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1045
1046         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1047         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1048                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1049                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1050         }
1051
1052         /* Both DMA5 and TPC7 use the same resources since only a single
1053          * engine need to participate in the reduction process
1054          */
1055         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1056         q = &hdev->kernel_queues[queue_id];
1057         q->sync_stream_prop.collective_sob_id =
1058                         sob_id + NIC_NUMBER_OF_ENGINES;
1059
1060         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1061         q = &hdev->kernel_queues[queue_id];
1062         q->sync_stream_prop.collective_sob_id =
1063                         sob_id + NIC_NUMBER_OF_ENGINES;
1064 }
1065
1066 static void gaudi_sob_group_hw_reset(struct kref *ref)
1067 {
1068         struct gaudi_hw_sob_group *hw_sob_group =
1069                 container_of(ref, struct gaudi_hw_sob_group, kref);
1070         struct hl_device *hdev = hw_sob_group->hdev;
1071         u64 base_addr;
1072         int rc;
1073
1074         base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1075                         hw_sob_group->base_sob_id * 4;
1076         rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1077                         base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1078         if (rc)
1079                 dev_err(hdev->dev,
1080                         "failed resetting sob group - sob base %u, count %u",
1081                         hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
1082
1083         kref_init(&hw_sob_group->kref);
1084 }
1085
1086 static void gaudi_sob_group_reset_error(struct kref *ref)
1087 {
1088         struct gaudi_hw_sob_group *hw_sob_group =
1089                 container_of(ref, struct gaudi_hw_sob_group, kref);
1090         struct hl_device *hdev = hw_sob_group->hdev;
1091
1092         dev_crit(hdev->dev,
1093                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1094                 hw_sob_group->base_sob_id);
1095 }
1096
1097 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1098 {
1099         struct gaudi_collective_properties *prop;
1100         int i;
1101
1102         prop = &gaudi->collective_props;
1103
1104         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1105
1106         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1107                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1108                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1109                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1110         /* Set collective engine bit */
1111         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1112                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1113 }
1114
1115 static int gaudi_collective_init(struct hl_device *hdev)
1116 {
1117         u32 i, sob_id, reserved_sobs_per_group;
1118         struct gaudi_collective_properties *prop;
1119         struct gaudi_device *gaudi;
1120
1121         gaudi = hdev->asic_specific;
1122         prop = &gaudi->collective_props;
1123         sob_id = hdev->asic_prop.collective_first_sob;
1124
1125         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1126         reserved_sobs_per_group =
1127                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1128
1129         /* Init SOB groups */
1130         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1131                 prop->hw_sob_group[i].hdev = hdev;
1132                 prop->hw_sob_group[i].base_sob_id = sob_id;
1133                 sob_id += reserved_sobs_per_group;
1134                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1135         }
1136
1137         for (i = 0 ; i < QMAN_STREAMS; i++) {
1138                 prop->next_sob_group_val[i] = 1;
1139                 prop->curr_sob_group_idx[i] = 0;
1140                 gaudi_collective_map_sobs(hdev, i);
1141         }
1142
1143         gaudi_collective_mstr_sob_mask_set(gaudi);
1144
1145         return 0;
1146 }
1147
1148 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1149 {
1150         struct gaudi_device *gaudi = hdev->asic_specific;
1151         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1152
1153         kref_put(&cprop->hw_sob_group[sob_group].kref,
1154                                         gaudi_sob_group_hw_reset);
1155 }
1156
1157 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1158                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1159 {
1160         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1161         struct gaudi_collective_properties *cprop;
1162         struct hl_gen_wait_properties wait_prop;
1163         struct hl_sync_stream_properties *prop;
1164         struct gaudi_device *gaudi;
1165
1166         gaudi = hdev->asic_specific;
1167         cprop = &gaudi->collective_props;
1168         queue_id = job->hw_queue_id;
1169         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1170
1171         master_sob_base =
1172                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1173         master_monitor = prop->collective_mstr_mon_id[0];
1174
1175         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1176
1177         dev_dbg(hdev->dev,
1178                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1179                 master_sob_base, cprop->mstr_sob_mask[0],
1180                 cprop->next_sob_group_val[stream],
1181                 master_monitor, queue_id);
1182
1183         wait_prop.data = (void *) job->patched_cb;
1184         wait_prop.sob_base = master_sob_base;
1185         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1186         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1187         wait_prop.mon_id = master_monitor;
1188         wait_prop.q_idx = queue_id;
1189         wait_prop.size = cb_size;
1190         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1191
1192         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1193         master_monitor = prop->collective_mstr_mon_id[1];
1194
1195         dev_dbg(hdev->dev,
1196                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1197                 master_sob_base, cprop->mstr_sob_mask[1],
1198                 cprop->next_sob_group_val[stream],
1199                 master_monitor, queue_id);
1200
1201         wait_prop.sob_base = master_sob_base;
1202         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1203         wait_prop.mon_id = master_monitor;
1204         wait_prop.size = cb_size;
1205         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1206 }
1207
1208 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1209                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1210 {
1211         struct hl_gen_wait_properties wait_prop;
1212         struct hl_sync_stream_properties *prop;
1213         u32 queue_id, cb_size = 0;
1214
1215         queue_id = job->hw_queue_id;
1216         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1217
1218         /* Add to wait CBs using slave monitor */
1219         wait_prop.data = (void *) job->user_cb;
1220         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1221         wait_prop.sob_mask = 0x1;
1222         wait_prop.sob_val = cs_cmpl->sob_val;
1223         wait_prop.mon_id = prop->collective_slave_mon_id;
1224         wait_prop.q_idx = queue_id;
1225         wait_prop.size = cb_size;
1226
1227         dev_dbg(hdev->dev,
1228                 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1229                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1230                 prop->collective_slave_mon_id, queue_id);
1231
1232         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1233
1234         dev_dbg(hdev->dev,
1235                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1236                 prop->collective_sob_id, queue_id);
1237
1238         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1239                         prop->collective_sob_id, cb_size, false);
1240 }
1241
1242 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1243 {
1244         struct hl_cs_compl *signal_cs_cmpl =
1245                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1246         struct hl_cs_compl *cs_cmpl =
1247                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1248         struct gaudi_collective_properties *cprop;
1249         u32 stream, queue_id, sob_group_offset;
1250         struct gaudi_device *gaudi;
1251         struct hl_device *hdev;
1252         struct hl_cs_job *job;
1253         struct hl_ctx *ctx;
1254
1255         ctx = cs->ctx;
1256         hdev = ctx->hdev;
1257         gaudi = hdev->asic_specific;
1258         cprop = &gaudi->collective_props;
1259
1260         /* copy the SOB id and value of the signal CS */
1261         cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1262         cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1263
1264         /* Calculate the stream from collective master queue (1st job) */
1265         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1266         stream = job->hw_queue_id % 4;
1267         sob_group_offset =
1268                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1269
1270         list_for_each_entry(job, &cs->job_list, cs_node) {
1271                 queue_id = job->hw_queue_id;
1272
1273                 if (hdev->kernel_queues[queue_id].collective_mode ==
1274                                 HL_COLLECTIVE_MASTER)
1275                         gaudi_collective_master_init_job(hdev, job, stream,
1276                                                 sob_group_offset);
1277                 else
1278                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1279         }
1280
1281         cs_cmpl->sob_group = sob_group_offset;
1282
1283         /* Handle sob group kref and wraparound */
1284         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1285         cprop->next_sob_group_val[stream]++;
1286
1287         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1288                 /*
1289                  * Decrement as we reached the max value.
1290                  * The release function won't be called here as we've
1291                  * just incremented the refcount.
1292                  */
1293                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1294                                 gaudi_sob_group_reset_error);
1295                 cprop->next_sob_group_val[stream] = 1;
1296                 /* only two SOBs are currently in use */
1297                 cprop->curr_sob_group_idx[stream] =
1298                         (cprop->curr_sob_group_idx[stream] + 1) &
1299                                                         (HL_RSVD_SOBS - 1);
1300
1301                 gaudi_collective_map_sobs(hdev, stream);
1302
1303                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1304                                 cprop->curr_sob_group_idx[stream], stream);
1305         }
1306
1307         /* Increment kref since all slave queues are now waiting on it */
1308         kref_get(&cs_cmpl->hw_sob->kref);
1309         /*
1310          * Must put the signal fence after the SOB refcnt increment so
1311          * the SOB refcnt won't turn 0 and reset the SOB before the
1312          * wait CS was submitted.
1313          */
1314         mb();
1315         hl_fence_put(cs->signal_fence);
1316         cs->signal_fence = NULL;
1317 }
1318
1319 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1320                 struct hl_ctx *ctx, struct hl_cs *cs,
1321                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1322 {
1323         struct hw_queue_properties *hw_queue_prop;
1324         struct hl_cs_counters_atomic *cntr;
1325         struct hl_cs_job *job;
1326         struct hl_cb *cb;
1327         u32 cb_size;
1328         bool patched_cb;
1329
1330         cntr = &hdev->aggregated_cs_counters;
1331
1332         if (mode == HL_COLLECTIVE_MASTER) {
1333                 /* CB size of collective master queue contains
1334                  * 4 msg short packets for monitor 1 configuration
1335                  * 1 fence packet
1336                  * 4 msg short packets for monitor 2 configuration
1337                  * 1 fence packet
1338                  * 2 msg prot packets for completion and MSI-X
1339                  */
1340                 cb_size = sizeof(struct packet_msg_short) * 8 +
1341                                 sizeof(struct packet_fence) * 2 +
1342                                 sizeof(struct packet_msg_prot) * 2;
1343                 patched_cb = true;
1344         } else {
1345                 /* CB size of collective slave queues contains
1346                  * 4 msg short packets for monitor configuration
1347                  * 1 fence packet
1348                  * 1 additional msg short packet for sob signal
1349                  */
1350                 cb_size = sizeof(struct packet_msg_short) * 5 +
1351                                 sizeof(struct packet_fence);
1352                 patched_cb = false;
1353         }
1354
1355         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1356         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1357         if (!job) {
1358                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1359                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1360                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1361                 return -ENOMEM;
1362         }
1363
1364         /* Allocate internal mapped CB for non patched CBs */
1365         cb = hl_cb_kernel_create(hdev, cb_size,
1366                         hdev->mmu_enable && !patched_cb);
1367         if (!cb) {
1368                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1369                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1370                 kfree(job);
1371                 return -EFAULT;
1372         }
1373
1374         job->id = 0;
1375         job->cs = cs;
1376         job->user_cb = cb;
1377         atomic_inc(&job->user_cb->cs_cnt);
1378         job->user_cb_size = cb_size;
1379         job->hw_queue_id = queue_id;
1380
1381         /*
1382          * No need in parsing, user CB is the patched CB.
1383          * We call hl_cb_destroy() out of two reasons - we don't need
1384          * the CB in the CB idr anymore and to decrement its refcount as
1385          * it was incremented inside hl_cb_kernel_create().
1386          */
1387         if (patched_cb)
1388                 job->patched_cb = job->user_cb;
1389         else
1390                 job->patched_cb = NULL;
1391
1392         job->job_cb_size = job->user_cb_size;
1393         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1394
1395         /* increment refcount as for external queues we get completion */
1396         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1397                 cs_get(cs);
1398
1399         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1400
1401         list_add_tail(&job->cs_node, &cs->job_list);
1402
1403         hl_debugfs_add_job(hdev, job);
1404
1405         return 0;
1406 }
1407
1408 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1409                 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1410                 u32 collective_engine_id)
1411 {
1412         struct gaudi_device *gaudi = hdev->asic_specific;
1413         struct hw_queue_properties *hw_queue_prop;
1414         u32 queue_id, collective_queue, num_jobs;
1415         u32 stream, nic_queue, nic_idx = 0;
1416         bool skip;
1417         int i, rc = 0;
1418
1419         /* Verify wait queue id is configured as master */
1420         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1421         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1422                 dev_err(hdev->dev,
1423                         "Queue %d is not configured as collective master\n",
1424                         wait_queue_id);
1425                 return -EINVAL;
1426         }
1427
1428         /* Verify engine id is supported */
1429         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1430                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1431                 dev_err(hdev->dev,
1432                         "Collective wait does not support engine %u\n",
1433                         collective_engine_id);
1434                 return -EINVAL;
1435         }
1436
1437         stream = wait_queue_id % 4;
1438
1439         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1440                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1441         else
1442                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1443
1444         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1445         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1446
1447         /* First job goes to the collective master queue, it will wait for
1448          * the collective slave queues to finish execution.
1449          * The synchronization is done using two monitors:
1450          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1451          * reduction engine (DMA5/TPC7).
1452          *
1453          * Rest of the jobs goes to the collective slave queues which will
1454          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1455          */
1456         for (i = 0 ; i < num_jobs ; i++) {
1457                 if (i == 0) {
1458                         queue_id = wait_queue_id;
1459                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1460                                 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1461                 } else {
1462                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1463                                 if (gaudi->hw_cap_initialized &
1464                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1465                                         skip = false;
1466                                 else
1467                                         skip = true;
1468
1469                                 queue_id = nic_queue;
1470                                 nic_queue += 4;
1471                                 nic_idx++;
1472
1473                                 if (skip)
1474                                         continue;
1475                         } else {
1476                                 queue_id = collective_queue;
1477                         }
1478
1479                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1480                                 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1481                 }
1482
1483                 if (rc)
1484                         return rc;
1485         }
1486
1487         return rc;
1488 }
1489
1490 static int gaudi_late_init(struct hl_device *hdev)
1491 {
1492         struct gaudi_device *gaudi = hdev->asic_specific;
1493         int rc;
1494
1495         rc = gaudi->cpucp_info_get(hdev);
1496         if (rc) {
1497                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1498                 return rc;
1499         }
1500
1501         if ((hdev->card_type == cpucp_card_type_pci) &&
1502                         (hdev->nic_ports_mask & 0x3)) {
1503                 dev_info(hdev->dev,
1504                         "PCI card detected, only 8 ports are enabled\n");
1505                 hdev->nic_ports_mask &= ~0x3;
1506
1507                 /* Stop and disable unused NIC QMANs */
1508                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1509                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1510                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1511
1512                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1513                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1514                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1515
1516                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1517                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1518
1519                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1520         }
1521
1522         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1523         if (rc) {
1524                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1525                 return rc;
1526         }
1527
1528         rc = gaudi_fetch_psoc_frequency(hdev);
1529         if (rc) {
1530                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1531                 goto disable_pci_access;
1532         }
1533
1534         rc = gaudi_mmu_clear_pgt_range(hdev);
1535         if (rc) {
1536                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1537                 goto disable_pci_access;
1538         }
1539
1540         rc = gaudi_init_tpc_mem(hdev);
1541         if (rc) {
1542                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1543                 goto disable_pci_access;
1544         }
1545
1546         rc = gaudi_collective_init(hdev);
1547         if (rc) {
1548                 dev_err(hdev->dev, "Failed to init collective\n");
1549                 goto disable_pci_access;
1550         }
1551
1552         return 0;
1553
1554 disable_pci_access:
1555         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1556
1557         return rc;
1558 }
1559
1560 static void gaudi_late_fini(struct hl_device *hdev)
1561 {
1562         const struct hwmon_channel_info **channel_info_arr;
1563         int i = 0;
1564
1565         if (!hdev->hl_chip_info->info)
1566                 return;
1567
1568         channel_info_arr = hdev->hl_chip_info->info;
1569
1570         while (channel_info_arr[i]) {
1571                 kfree(channel_info_arr[i]->config);
1572                 kfree(channel_info_arr[i]);
1573                 i++;
1574         }
1575
1576         kfree(channel_info_arr);
1577
1578         hdev->hl_chip_info->info = NULL;
1579 }
1580
1581 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1582 {
1583         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1584         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1585         int i, j, rc = 0;
1586
1587         /*
1588          * The device CPU works with 40-bits addresses, while bit 39 must be set
1589          * to '1' when accessing the host.
1590          * Bits 49:39 of the full host address are saved for a later
1591          * configuration of the HW to perform extension to 50 bits.
1592          * Because there is a single HW register that holds the extension bits,
1593          * these bits must be identical in all allocated range.
1594          */
1595
1596         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1597                 virt_addr_arr[i] =
1598                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1599                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1600                                                 &dma_addr_arr[i],
1601                                                 GFP_KERNEL | __GFP_ZERO);
1602                 if (!virt_addr_arr[i]) {
1603                         rc = -ENOMEM;
1604                         goto free_dma_mem_arr;
1605                 }
1606
1607                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1608                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1609                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1610                         break;
1611         }
1612
1613         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1614                 dev_err(hdev->dev,
1615                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1616                 rc = -EFAULT;
1617                 goto free_dma_mem_arr;
1618         }
1619
1620         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1621         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1622         hdev->cpu_pci_msb_addr =
1623                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1624
1625         if (!hdev->asic_prop.fw_security_enabled)
1626                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1627
1628 free_dma_mem_arr:
1629         for (j = 0 ; j < i ; j++)
1630                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1631                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1632                                                 virt_addr_arr[j],
1633                                                 dma_addr_arr[j]);
1634
1635         return rc;
1636 }
1637
1638 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1639 {
1640         struct gaudi_device *gaudi = hdev->asic_specific;
1641         struct gaudi_internal_qman_info *q;
1642         u32 i;
1643
1644         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1645                 q = &gaudi->internal_qmans[i];
1646                 if (!q->pq_kernel_addr)
1647                         continue;
1648                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1649                                                         q->pq_kernel_addr,
1650                                                         q->pq_dma_addr);
1651         }
1652 }
1653
1654 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1655 {
1656         struct gaudi_device *gaudi = hdev->asic_specific;
1657         struct gaudi_internal_qman_info *q;
1658         int rc, i;
1659
1660         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1661                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1662                         continue;
1663
1664                 q = &gaudi->internal_qmans[i];
1665
1666                 switch (i) {
1667                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1668                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1669                         break;
1670                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1671                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1672                         break;
1673                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1674                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1675                         break;
1676                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1677                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1678                         break;
1679                 default:
1680                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1681                         rc = -EINVAL;
1682                         goto free_internal_qmans_pq_mem;
1683                 }
1684
1685                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1686                                                 hdev, q->pq_size,
1687                                                 &q->pq_dma_addr,
1688                                                 GFP_KERNEL | __GFP_ZERO);
1689                 if (!q->pq_kernel_addr) {
1690                         rc = -ENOMEM;
1691                         goto free_internal_qmans_pq_mem;
1692                 }
1693         }
1694
1695         return 0;
1696
1697 free_internal_qmans_pq_mem:
1698         gaudi_free_internal_qmans_pq_mem(hdev);
1699         return rc;
1700 }
1701
1702 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1703 {
1704         struct asic_fixed_properties *prop = &hdev->asic_prop;
1705         struct pci_mem_region *region;
1706
1707         /* CFG */
1708         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1709         region->region_base = CFG_BASE;
1710         region->region_size = CFG_SIZE;
1711         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1712         region->bar_size = CFG_BAR_SIZE;
1713         region->bar_id = CFG_BAR_ID;
1714         region->used = 1;
1715
1716         /* SRAM */
1717         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1718         region->region_base = SRAM_BASE_ADDR;
1719         region->region_size = SRAM_SIZE;
1720         region->offset_in_bar = 0;
1721         region->bar_size = SRAM_BAR_SIZE;
1722         region->bar_id = SRAM_BAR_ID;
1723         region->used = 1;
1724
1725         /* DRAM */
1726         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1727         region->region_base = DRAM_PHYS_BASE;
1728         region->region_size = hdev->asic_prop.dram_size;
1729         region->offset_in_bar = 0;
1730         region->bar_size = prop->dram_pci_bar_size;
1731         region->bar_id = HBM_BAR_ID;
1732         region->used = 1;
1733
1734         /* SP SRAM */
1735         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1736         region->region_base = PSOC_SCRATCHPAD_ADDR;
1737         region->region_size = PSOC_SCRATCHPAD_SIZE;
1738         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1739         region->bar_size = CFG_BAR_SIZE;
1740         region->bar_id = CFG_BAR_ID;
1741         region->used = 1;
1742 }
1743
1744 static int gaudi_sw_init(struct hl_device *hdev)
1745 {
1746         struct gaudi_device *gaudi;
1747         u32 i, event_id = 0;
1748         int rc;
1749
1750         /* Allocate device structure */
1751         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1752         if (!gaudi)
1753                 return -ENOMEM;
1754
1755         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1756                 if (gaudi_irq_map_table[i].valid) {
1757                         if (event_id == GAUDI_EVENT_SIZE) {
1758                                 dev_err(hdev->dev,
1759                                         "Event array exceeds the limit of %u events\n",
1760                                         GAUDI_EVENT_SIZE);
1761                                 rc = -EINVAL;
1762                                 goto free_gaudi_device;
1763                         }
1764
1765                         gaudi->events[event_id++] =
1766                                         gaudi_irq_map_table[i].fc_id;
1767                 }
1768         }
1769
1770         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1771
1772         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1773
1774         hdev->asic_specific = gaudi;
1775
1776         /* Create DMA pool for small allocations */
1777         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1778                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1779         if (!hdev->dma_pool) {
1780                 dev_err(hdev->dev, "failed to create DMA pool\n");
1781                 rc = -ENOMEM;
1782                 goto free_gaudi_device;
1783         }
1784
1785         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1786         if (rc)
1787                 goto free_dma_pool;
1788
1789         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1790         if (!hdev->cpu_accessible_dma_pool) {
1791                 dev_err(hdev->dev,
1792                         "Failed to create CPU accessible DMA pool\n");
1793                 rc = -ENOMEM;
1794                 goto free_cpu_dma_mem;
1795         }
1796
1797         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1798                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1799                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1800         if (rc) {
1801                 dev_err(hdev->dev,
1802                         "Failed to add memory to CPU accessible DMA pool\n");
1803                 rc = -EFAULT;
1804                 goto free_cpu_accessible_dma_pool;
1805         }
1806
1807         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1808         if (rc)
1809                 goto free_cpu_accessible_dma_pool;
1810
1811         spin_lock_init(&gaudi->hw_queues_lock);
1812         mutex_init(&gaudi->clk_gate_mutex);
1813
1814         hdev->supports_sync_stream = true;
1815         hdev->supports_coresight = true;
1816         hdev->supports_staged_submission = true;
1817
1818         gaudi_set_pci_memory_regions(hdev);
1819
1820         return 0;
1821
1822 free_cpu_accessible_dma_pool:
1823         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1824 free_cpu_dma_mem:
1825         if (!hdev->asic_prop.fw_security_enabled)
1826                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1827                                         hdev->cpu_pci_msb_addr);
1828         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1829                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1830                         hdev->cpu_accessible_dma_mem,
1831                         hdev->cpu_accessible_dma_address);
1832 free_dma_pool:
1833         dma_pool_destroy(hdev->dma_pool);
1834 free_gaudi_device:
1835         kfree(gaudi);
1836         return rc;
1837 }
1838
1839 static int gaudi_sw_fini(struct hl_device *hdev)
1840 {
1841         struct gaudi_device *gaudi = hdev->asic_specific;
1842
1843         gaudi_free_internal_qmans_pq_mem(hdev);
1844
1845         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1846
1847         if (!hdev->asic_prop.fw_security_enabled)
1848                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1849                                         hdev->cpu_pci_msb_addr);
1850
1851         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1852                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1853                         hdev->cpu_accessible_dma_mem,
1854                         hdev->cpu_accessible_dma_address);
1855
1856         dma_pool_destroy(hdev->dma_pool);
1857
1858         mutex_destroy(&gaudi->clk_gate_mutex);
1859
1860         kfree(gaudi);
1861
1862         return 0;
1863 }
1864
1865 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1866 {
1867         struct hl_device *hdev = arg;
1868         int i;
1869
1870         if (hdev->disabled)
1871                 return IRQ_HANDLED;
1872
1873         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1874                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1875
1876         hl_irq_handler_eq(irq, &hdev->event_queue);
1877
1878         return IRQ_HANDLED;
1879 }
1880
1881 /*
1882  * For backward compatibility, new MSI interrupts should be set after the
1883  * existing CPU and NIC interrupts.
1884  */
1885 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1886                                 bool cpu_eq)
1887 {
1888         int msi_vec;
1889
1890         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1891                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1892                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1893
1894         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1895                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1896
1897         return pci_irq_vector(hdev->pdev, msi_vec);
1898 }
1899
1900 static int gaudi_enable_msi_single(struct hl_device *hdev)
1901 {
1902         int rc, irq;
1903
1904         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1905
1906         irq = gaudi_pci_irq_vector(hdev, 0, false);
1907         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1908                         "gaudi single msi", hdev);
1909         if (rc)
1910                 dev_err(hdev->dev,
1911                         "Failed to request single MSI IRQ\n");
1912
1913         return rc;
1914 }
1915
1916 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1917 {
1918         int cq_cnt = hdev->asic_prop.completion_queues_count;
1919         int rc, i, irq_cnt_init, irq;
1920
1921         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1922                 irq = gaudi_pci_irq_vector(hdev, i, false);
1923                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1924                                 &hdev->completion_queue[i]);
1925                 if (rc) {
1926                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1927                         goto free_irqs;
1928                 }
1929         }
1930
1931         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1932         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1933                                 &hdev->event_queue);
1934         if (rc) {
1935                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1936                 goto free_irqs;
1937         }
1938
1939         return 0;
1940
1941 free_irqs:
1942         for (i = 0 ; i < irq_cnt_init ; i++)
1943                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1944                                 &hdev->completion_queue[i]);
1945         return rc;
1946 }
1947
1948 static int gaudi_enable_msi(struct hl_device *hdev)
1949 {
1950         struct gaudi_device *gaudi = hdev->asic_specific;
1951         int rc;
1952
1953         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1954                 return 0;
1955
1956         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1957         if (rc < 0) {
1958                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1959                 return rc;
1960         }
1961
1962         if (rc < NUMBER_OF_INTERRUPTS) {
1963                 gaudi->multi_msi_mode = false;
1964                 rc = gaudi_enable_msi_single(hdev);
1965         } else {
1966                 gaudi->multi_msi_mode = true;
1967                 rc = gaudi_enable_msi_multi(hdev);
1968         }
1969
1970         if (rc)
1971                 goto free_pci_irq_vectors;
1972
1973         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1974
1975         return 0;
1976
1977 free_pci_irq_vectors:
1978         pci_free_irq_vectors(hdev->pdev);
1979         return rc;
1980 }
1981
1982 static void gaudi_sync_irqs(struct hl_device *hdev)
1983 {
1984         struct gaudi_device *gaudi = hdev->asic_specific;
1985         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1986
1987         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1988                 return;
1989
1990         /* Wait for all pending IRQs to be finished */
1991         if (gaudi->multi_msi_mode) {
1992                 for (i = 0 ; i < cq_cnt ; i++)
1993                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1994
1995                 synchronize_irq(gaudi_pci_irq_vector(hdev,
1996                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
1997                                                 true));
1998         } else {
1999                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2000         }
2001 }
2002
2003 static void gaudi_disable_msi(struct hl_device *hdev)
2004 {
2005         struct gaudi_device *gaudi = hdev->asic_specific;
2006         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2007
2008         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2009                 return;
2010
2011         gaudi_sync_irqs(hdev);
2012
2013         if (gaudi->multi_msi_mode) {
2014                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2015                                                 true);
2016                 free_irq(irq, &hdev->event_queue);
2017
2018                 for (i = 0 ; i < cq_cnt ; i++) {
2019                         irq = gaudi_pci_irq_vector(hdev, i, false);
2020                         free_irq(irq, &hdev->completion_queue[i]);
2021                 }
2022         } else {
2023                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2024         }
2025
2026         pci_free_irq_vectors(hdev->pdev);
2027
2028         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2029 }
2030
2031 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2032 {
2033         struct gaudi_device *gaudi = hdev->asic_specific;
2034
2035         if (hdev->asic_prop.fw_security_enabled)
2036                 return;
2037
2038         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2039                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2040                 return;
2041
2042         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2043                 return;
2044
2045         if (!hdev->sram_scrambler_enable)
2046                 return;
2047
2048         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2049                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2050         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2051                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2052         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2053                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2054         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2055                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2056         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2057                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2058         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2059                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2060         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2061                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2062         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2063                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2064
2065         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2066                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2067         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2068                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2069         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2070                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2071         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2072                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2073         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2074                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2075         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2076                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2077         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2078                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2079         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2080                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2081
2082         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2083                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2084         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2085                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2086         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2087                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2088         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2089                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2090         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2091                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2092         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2093                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2094         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2095                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2096         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2097                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2098
2099         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2100 }
2101
2102 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2103 {
2104         struct gaudi_device *gaudi = hdev->asic_specific;
2105
2106         if (hdev->asic_prop.fw_security_enabled)
2107                 return;
2108
2109         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2110                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2111                 return;
2112
2113         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2114                 return;
2115
2116         if (!hdev->dram_scrambler_enable)
2117                 return;
2118
2119         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2120                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2121         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2122                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2123         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2124                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2125         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2126                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2127         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2128                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2129         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2130                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2131         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2132                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2133         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2134                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2135
2136         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2137                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2138         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2139                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2140         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2141                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2142         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2143                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2144         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2145                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2146         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2147                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2148         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2149                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2150         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2151                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152
2153         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2154                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2155         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2156                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2157         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2158                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2159         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2160                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2161         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2162                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2163         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2164                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2165         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2166                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2167         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2168                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2169
2170         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2171 }
2172
2173 static void gaudi_init_e2e(struct hl_device *hdev)
2174 {
2175         if (hdev->asic_prop.fw_security_enabled)
2176                 return;
2177
2178         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2179                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2180                 return;
2181
2182         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2183         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2184         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2185         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2186
2187         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2188         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2189         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2190         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2191
2192         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2193         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2194         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2195         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2196
2197         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2198         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2199         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2200         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2201
2202         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2203         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2204         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2205         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2206
2207         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2208         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2209         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2210         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2211
2212         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2213         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2214         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2215         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2216
2217         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2218         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2219         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2220         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2221
2222         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2223         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2224         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2225         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2226
2227         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2228         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2229         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2230         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2231
2232         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2233         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2234         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2235         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2236
2237         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2238         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2239         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2240         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2241
2242         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2243         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2244         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2245         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2246
2247         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2248         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2249         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2250         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2251
2252         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2253         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2254         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2255         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2256
2257         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2258         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2259         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2260         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2261
2262         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2263         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2264         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2265         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2266
2267         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2268         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2269         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2270         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2271
2272         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2273         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2274         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2275         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2276
2277         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2278         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2279         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2280         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2281
2282         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2283         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2284         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2285         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2286
2287         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2288         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2289         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2290         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2291
2292         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2293         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2294         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2295         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2296
2297         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2298         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2299         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2300         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2301
2302         if (!hdev->dram_scrambler_enable) {
2303                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2304                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2305                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2306                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2307
2308                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2309                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2310                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2311                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2312
2313                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2314                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2315                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2316                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2317
2318                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2319                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2320                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2321                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2322
2323                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2324                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2325                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2326                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2327
2328                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2329                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2330                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2331                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2332
2333                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2334                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2335                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2336                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2337
2338                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2339                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2340                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2341                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2342
2343                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2344                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2345                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2346                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2347
2348                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2349                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2350                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2351                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2352
2353                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2354                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2355                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2356                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2357
2358                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2359                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2360                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2361                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2362
2363                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2364                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2365                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2366                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2367
2368                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2369                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2370                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2371                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2372
2373                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2374                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2375                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2376                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2377
2378                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2379                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2380                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2381                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2382
2383                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2384                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2385                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2386                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2387
2388                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2389                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2390                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2391                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2392
2393                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2394                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2395                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2396                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2397
2398                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2399                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2400                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2401                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2402
2403                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2404                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2405                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2406                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2407
2408                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2409                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2410                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2411                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2412
2413                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2414                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2415                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2416                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2417
2418                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2419                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2420                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2421                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2422         }
2423
2424         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2425                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2426         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2427                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2428
2429         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2430                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2431         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2432                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2433
2434         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2435                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2436         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2437                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2438
2439         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2440                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2441         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2442                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2443
2444         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2445                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2446         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2447                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2448
2449         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2450                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2451         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2452                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2453
2454         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2455                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2456         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2457                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2458
2459         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2460                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2461         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2462                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2463
2464         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2465                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2466         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2467                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2468
2469         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2470                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2471         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2472                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2473
2474         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2475                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2476         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2477                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2478
2479         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2480                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2481         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2482                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2483
2484         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2485                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2486         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2487                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2488
2489         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2490                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2491         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2492                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2493
2494         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2495                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2496         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2497                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2498
2499         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2500                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2501         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2502                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2503
2504         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2505                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2506         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2507                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2508
2509         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2510                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2511         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2512                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2513
2514         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2515                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2516         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2517                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2518
2519         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2520                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2521         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2522                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2523
2524         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2525                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2526         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2527                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2528
2529         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2530                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2531         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2532                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2533
2534         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2535                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2536         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2537                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2538
2539         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2540                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2541         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2542                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2543 }
2544
2545 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2546 {
2547         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2548
2549         if (hdev->asic_prop.fw_security_enabled)
2550                 return;
2551
2552         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2553                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2554                 return;
2555
2556         hbm0_wr = 0x33333333;
2557         hbm0_rd = 0x77777777;
2558         hbm1_wr = 0x55555555;
2559         hbm1_rd = 0xDDDDDDDD;
2560
2561         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2562         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2563         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2564         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2565
2566         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2567         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2568         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2569         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2570
2571         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2572         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2573         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2574         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2575
2576         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2577         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2578         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2579         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2580
2581         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2582                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2583                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2584         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2585                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2586                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2587         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2588                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2589                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2590         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2591                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2592                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2593
2594         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2595                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2596                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2597         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2598                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2599                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2600         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2601                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2602                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2603         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2604                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2605                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2606 }
2607
2608 static void gaudi_init_golden_registers(struct hl_device *hdev)
2609 {
2610         u32 tpc_offset;
2611         int tpc_id, i;
2612
2613         gaudi_init_e2e(hdev);
2614         gaudi_init_hbm_cred(hdev);
2615
2616         for (tpc_id = 0, tpc_offset = 0;
2617                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2618                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2619                 /* Mask all arithmetic interrupts from TPC */
2620                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2621                 /* Set 16 cache lines */
2622                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2623                                 ICACHE_FETCH_LINE_NUM, 2);
2624         }
2625
2626         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2627         for (i = 0 ; i < 128 ; i += 8)
2628                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2629
2630         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2631         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2632         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2633         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2634 }
2635
2636 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2637                                         int qman_id, dma_addr_t qman_pq_addr)
2638 {
2639         struct cpu_dyn_regs *dyn_regs =
2640                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2641         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2642         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2643         u32 q_off, dma_qm_offset;
2644         u32 dma_qm_err_cfg, irq_handler_offset;
2645
2646         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2647
2648         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2649                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2650         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2651                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2652         so_base_en_lo = lower_32_bits(CFG_BASE +
2653                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2654         so_base_en_hi = upper_32_bits(CFG_BASE +
2655                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2656         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2657                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2658         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2659                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2660         so_base_ws_lo = lower_32_bits(CFG_BASE +
2661                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2662         so_base_ws_hi = upper_32_bits(CFG_BASE +
2663                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2664
2665         q_off = dma_qm_offset + qman_id * 4;
2666
2667         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2668         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2669
2670         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2671         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2672         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2673
2674         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2675         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2676                                                         QMAN_LDMA_SRC_OFFSET);
2677         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2678                                                         QMAN_LDMA_DST_OFFSET);
2679
2680         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2681         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2682         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2683         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2684         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2685         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2686         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2687         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2688
2689         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2690
2691         /* The following configuration is needed only once per QMAN */
2692         if (qman_id == 0) {
2693                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2694                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2695                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2696
2697                 /* Configure RAZWI IRQ */
2698                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2699                 if (hdev->stop_on_err)
2700                         dma_qm_err_cfg |=
2701                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2702
2703                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2704
2705                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2706                         lower_32_bits(CFG_BASE + irq_handler_offset));
2707                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2708                         upper_32_bits(CFG_BASE + irq_handler_offset));
2709
2710                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2711                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2712                                                                         dma_id);
2713
2714                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2715                                 QM_ARB_ERR_MSG_EN_MASK);
2716
2717                 /* Increase ARB WDT to support streams architecture */
2718                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2719                                 GAUDI_ARB_WDT_TIMEOUT);
2720
2721                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2722                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2723
2724                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2725         }
2726 }
2727
2728 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2729 {
2730         struct cpu_dyn_regs *dyn_regs =
2731                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2732         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2733         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2734         u32 irq_handler_offset;
2735
2736         /* Set to maximum possible according to physical size */
2737         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2738         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2739
2740         /* WA for H/W bug H3-2116 */
2741         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2742
2743         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2744         if (hdev->stop_on_err)
2745                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2746
2747         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2748
2749         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2750                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2751                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2752
2753         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2754                 lower_32_bits(CFG_BASE + irq_handler_offset));
2755         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2756                 upper_32_bits(CFG_BASE + irq_handler_offset));
2757
2758         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2759                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2760         WREG32(mmDMA0_CORE_PROT + dma_offset,
2761                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2762         /* If the channel is secured, it should be in MMU bypass mode */
2763         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2764                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2765         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2766 }
2767
2768 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2769                                 u32 enable_mask)
2770 {
2771         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2772
2773         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2774 }
2775
2776 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2777 {
2778         struct gaudi_device *gaudi = hdev->asic_specific;
2779         struct hl_hw_queue *q;
2780         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2781
2782         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2783                 return;
2784
2785         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2786                 dma_id = gaudi_dma_assignment[i];
2787                 /*
2788                  * For queues after the CPU Q need to add 1 to get the correct
2789                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2790                  * order to get the correct MSI register.
2791                  */
2792                 if (dma_id > 1) {
2793                         cpu_skip = 1;
2794                         nic_skip = NIC_NUMBER_OF_ENGINES;
2795                 } else {
2796                         cpu_skip = 0;
2797                         nic_skip = 0;
2798                 }
2799
2800                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2801                         q_idx = 4 * dma_id + j + cpu_skip;
2802                         q = &hdev->kernel_queues[q_idx];
2803                         q->cq_id = cq_id++;
2804                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2805                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2806                                                 q->bus_address);
2807                 }
2808
2809                 gaudi_init_dma_core(hdev, dma_id);
2810
2811                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2812         }
2813
2814         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2815 }
2816
2817 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2818                                         int qman_id, u64 qman_base_addr)
2819 {
2820         struct cpu_dyn_regs *dyn_regs =
2821                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2822         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2823         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2824         u32 dma_qm_err_cfg, irq_handler_offset;
2825         u32 q_off, dma_qm_offset;
2826
2827         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2828
2829         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2830                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2831         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2832                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2833         so_base_en_lo = lower_32_bits(CFG_BASE +
2834                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2835         so_base_en_hi = upper_32_bits(CFG_BASE +
2836                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2837         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2838                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2839         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2840                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2841         so_base_ws_lo = lower_32_bits(CFG_BASE +
2842                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2843         so_base_ws_hi = upper_32_bits(CFG_BASE +
2844                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2845
2846         q_off = dma_qm_offset + qman_id * 4;
2847
2848         if (qman_id < 4) {
2849                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2850                                         lower_32_bits(qman_base_addr));
2851                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2852                                         upper_32_bits(qman_base_addr));
2853
2854                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2855                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2856                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2857
2858                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2859                                                         QMAN_CPDMA_SIZE_OFFSET);
2860                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2861                                                         QMAN_CPDMA_SRC_OFFSET);
2862                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2863                                                         QMAN_CPDMA_DST_OFFSET);
2864         } else {
2865                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2866                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2867                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2868
2869                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2870                                                         QMAN_LDMA_SIZE_OFFSET);
2871                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2872                                                         QMAN_LDMA_SRC_OFFSET);
2873                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2874                                                         QMAN_LDMA_DST_OFFSET);
2875
2876                 /* Configure RAZWI IRQ */
2877                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2878                 if (hdev->stop_on_err)
2879                         dma_qm_err_cfg |=
2880                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2881
2882                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2883
2884                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2885                         lower_32_bits(CFG_BASE + irq_handler_offset));
2886                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2887                         upper_32_bits(CFG_BASE + irq_handler_offset));
2888
2889                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2890                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2891                                                                         dma_id);
2892
2893                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2894                                 QM_ARB_ERR_MSG_EN_MASK);
2895
2896                 /* Increase ARB WDT to support streams architecture */
2897                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2898                                 GAUDI_ARB_WDT_TIMEOUT);
2899
2900                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2901                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2902                                 QMAN_INTERNAL_MAKE_TRUSTED);
2903         }
2904
2905         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2906         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2907         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2908         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2909
2910         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2911         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2912                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2913                                 mtr_base_ws_lo);
2914                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2915                                 mtr_base_ws_hi);
2916                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2917                                 so_base_ws_lo);
2918                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2919                                 so_base_ws_hi);
2920         }
2921 }
2922
2923 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2924 {
2925         struct gaudi_device *gaudi = hdev->asic_specific;
2926         struct gaudi_internal_qman_info *q;
2927         u64 qman_base_addr;
2928         int i, j, dma_id, internal_q_index;
2929
2930         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2931                 return;
2932
2933         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2934                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2935
2936                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2937                          /*
2938                           * Add the CPU queue in order to get the correct queue
2939                           * number as all internal queue are placed after it
2940                           */
2941                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2942
2943                         q = &gaudi->internal_qmans[internal_q_index];
2944                         qman_base_addr = (u64) q->pq_dma_addr;
2945                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2946                                                 qman_base_addr);
2947                 }
2948
2949                 /* Initializing lower CP for HBM DMA QMAN */
2950                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2951
2952                 gaudi_init_dma_core(hdev, dma_id);
2953
2954                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2955         }
2956
2957         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2958 }
2959
2960 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2961                                         int qman_id, u64 qman_base_addr)
2962 {
2963         struct cpu_dyn_regs *dyn_regs =
2964                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2965         u32 mtr_base_lo, mtr_base_hi;
2966         u32 so_base_lo, so_base_hi;
2967         u32 irq_handler_offset;
2968         u32 q_off, mme_id;
2969         u32 mme_qm_err_cfg;
2970
2971         mtr_base_lo = lower_32_bits(CFG_BASE +
2972                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2973         mtr_base_hi = upper_32_bits(CFG_BASE +
2974                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2975         so_base_lo = lower_32_bits(CFG_BASE +
2976                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2977         so_base_hi = upper_32_bits(CFG_BASE +
2978                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2979
2980         q_off = mme_offset + qman_id * 4;
2981
2982         if (qman_id < 4) {
2983                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2984                                         lower_32_bits(qman_base_addr));
2985                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2986                                         upper_32_bits(qman_base_addr));
2987
2988                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2989                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2990                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2991
2992                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2993                                                         QMAN_CPDMA_SIZE_OFFSET);
2994                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2995                                                         QMAN_CPDMA_SRC_OFFSET);
2996                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2997                                                         QMAN_CPDMA_DST_OFFSET);
2998         } else {
2999                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3000                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3001                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3002
3003                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3004                                                         QMAN_LDMA_SIZE_OFFSET);
3005                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3006                                                         QMAN_LDMA_SRC_OFFSET);
3007                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3008                                                         QMAN_LDMA_DST_OFFSET);
3009
3010                 /* Configure RAZWI IRQ */
3011                 mme_id = mme_offset /
3012                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3013
3014                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3015                 if (hdev->stop_on_err)
3016                         mme_qm_err_cfg |=
3017                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3018
3019                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3020
3021                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3022                         lower_32_bits(CFG_BASE + irq_handler_offset));
3023                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3024                         upper_32_bits(CFG_BASE + irq_handler_offset));
3025
3026                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3027                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3028                                                                         mme_id);
3029
3030                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3031                                 QM_ARB_ERR_MSG_EN_MASK);
3032
3033                 /* Increase ARB WDT to support streams architecture */
3034                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3035                                 GAUDI_ARB_WDT_TIMEOUT);
3036
3037                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3038                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3039                                 QMAN_INTERNAL_MAKE_TRUSTED);
3040         }
3041
3042         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3043         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3044         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3045         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3046 }
3047
3048 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3049 {
3050         struct gaudi_device *gaudi = hdev->asic_specific;
3051         struct gaudi_internal_qman_info *q;
3052         u64 qman_base_addr;
3053         u32 mme_offset;
3054         int i, internal_q_index;
3055
3056         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3057                 return;
3058
3059         /*
3060          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3061          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3062          */
3063
3064         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3065
3066         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3067                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3068                 q = &gaudi->internal_qmans[internal_q_index];
3069                 qman_base_addr = (u64) q->pq_dma_addr;
3070                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3071                                         qman_base_addr);
3072                 if (i == 3)
3073                         mme_offset = 0;
3074         }
3075
3076         /* Initializing lower CP for MME QMANs */
3077         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3078         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3079         gaudi_init_mme_qman(hdev, 0, 4, 0);
3080
3081         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3082         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3083
3084         gaudi->hw_cap_initialized |= HW_CAP_MME;
3085 }
3086
3087 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3088                                 int qman_id, u64 qman_base_addr)
3089 {
3090         struct cpu_dyn_regs *dyn_regs =
3091                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3092         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3093         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3094         u32 tpc_qm_err_cfg, irq_handler_offset;
3095         u32 q_off, tpc_id;
3096
3097         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3098                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3099         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3100                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3101         so_base_en_lo = lower_32_bits(CFG_BASE +
3102                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3103         so_base_en_hi = upper_32_bits(CFG_BASE +
3104                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3105         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3106                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3107         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3108                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3109         so_base_ws_lo = lower_32_bits(CFG_BASE +
3110                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3111         so_base_ws_hi = upper_32_bits(CFG_BASE +
3112                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3113
3114         q_off = tpc_offset + qman_id * 4;
3115
3116         tpc_id = tpc_offset /
3117                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3118
3119         if (qman_id < 4) {
3120                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3121                                         lower_32_bits(qman_base_addr));
3122                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3123                                         upper_32_bits(qman_base_addr));
3124
3125                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3126                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3127                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3128
3129                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3130                                                         QMAN_CPDMA_SIZE_OFFSET);
3131                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3132                                                         QMAN_CPDMA_SRC_OFFSET);
3133                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3134                                                         QMAN_CPDMA_DST_OFFSET);
3135         } else {
3136                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3137                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3138                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3139
3140                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3141                                                         QMAN_LDMA_SIZE_OFFSET);
3142                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3143                                                         QMAN_LDMA_SRC_OFFSET);
3144                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3145                                                         QMAN_LDMA_DST_OFFSET);
3146
3147                 /* Configure RAZWI IRQ */
3148                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3149                 if (hdev->stop_on_err)
3150                         tpc_qm_err_cfg |=
3151                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3152
3153                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3154
3155                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3156                         lower_32_bits(CFG_BASE + irq_handler_offset));
3157                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3158                         upper_32_bits(CFG_BASE + irq_handler_offset));
3159
3160                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3161                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3162                                                                         tpc_id);
3163
3164                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3165                                 QM_ARB_ERR_MSG_EN_MASK);
3166
3167                 /* Increase ARB WDT to support streams architecture */
3168                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3169                                 GAUDI_ARB_WDT_TIMEOUT);
3170
3171                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3172                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3173                                 QMAN_INTERNAL_MAKE_TRUSTED);
3174         }
3175
3176         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3177         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3178         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3179         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3180
3181         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3182         if (tpc_id == 6) {
3183                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3184                                 mtr_base_ws_lo);
3185                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3186                                 mtr_base_ws_hi);
3187                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3188                                 so_base_ws_lo);
3189                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3190                                 so_base_ws_hi);
3191         }
3192 }
3193
3194 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3195 {
3196         struct gaudi_device *gaudi = hdev->asic_specific;
3197         struct gaudi_internal_qman_info *q;
3198         u64 qman_base_addr;
3199         u32 so_base_hi, tpc_offset = 0;
3200         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3201                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3202         int i, tpc_id, internal_q_index;
3203
3204         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3205                 return;
3206
3207         so_base_hi = upper_32_bits(CFG_BASE +
3208                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3209
3210         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3211                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3212                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3213                                                 tpc_id * QMAN_STREAMS + i;
3214                         q = &gaudi->internal_qmans[internal_q_index];
3215                         qman_base_addr = (u64) q->pq_dma_addr;
3216                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3217                                                 qman_base_addr);
3218
3219                         if (i == 3) {
3220                                 /* Initializing lower CP for TPC QMAN */
3221                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3222
3223                                 /* Enable the QMAN and TPC channel */
3224                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3225                                                 QMAN_TPC_ENABLE);
3226                         }
3227                 }
3228
3229                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3230                                 so_base_hi);
3231
3232                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3233
3234                 gaudi->hw_cap_initialized |=
3235                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3236         }
3237 }
3238
3239 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3240                                 int qman_id, u64 qman_base_addr, int nic_id)
3241 {
3242         struct cpu_dyn_regs *dyn_regs =
3243                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3244         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3245         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3246         u32 nic_qm_err_cfg, irq_handler_offset;
3247         u32 q_off;
3248
3249         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3250                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3251         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3252                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3253         so_base_en_lo = lower_32_bits(CFG_BASE +
3254                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3255         so_base_en_hi = upper_32_bits(CFG_BASE +
3256                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3257         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3258                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3259         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3260                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3261         so_base_ws_lo = lower_32_bits(CFG_BASE +
3262                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3263         so_base_ws_hi = upper_32_bits(CFG_BASE +
3264                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3265
3266         q_off = nic_offset + qman_id * 4;
3267
3268         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3269         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3270
3271         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3272         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3273         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3274
3275         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3276                                                         QMAN_LDMA_SIZE_OFFSET);
3277         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3278                                                         QMAN_LDMA_SRC_OFFSET);
3279         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3280                                                         QMAN_LDMA_DST_OFFSET);
3281
3282         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3283         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3284         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3285         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3286
3287         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3288         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3289         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3290         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3291         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3292
3293         if (qman_id == 0) {
3294                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3295                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3296                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3297
3298                 /* Configure RAZWI IRQ */
3299                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3300                 if (hdev->stop_on_err)
3301                         nic_qm_err_cfg |=
3302                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3303
3304                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3305
3306                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3307                         lower_32_bits(CFG_BASE + irq_handler_offset));
3308                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3309                         upper_32_bits(CFG_BASE + irq_handler_offset));
3310
3311                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3312                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3313                                                                         nic_id);
3314
3315                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3316                                 QM_ARB_ERR_MSG_EN_MASK);
3317
3318                 /* Increase ARB WDT to support streams architecture */
3319                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3320                                 GAUDI_ARB_WDT_TIMEOUT);
3321
3322                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3323                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3324                                 QMAN_INTERNAL_MAKE_TRUSTED);
3325         }
3326 }
3327
3328 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3329 {
3330         struct gaudi_device *gaudi = hdev->asic_specific;
3331         struct gaudi_internal_qman_info *q;
3332         u64 qman_base_addr;
3333         u32 nic_offset = 0;
3334         u32 nic_delta_between_qmans =
3335                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3336         u32 nic_delta_between_nics =
3337                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3338         int i, nic_id, internal_q_index;
3339
3340         if (!hdev->nic_ports_mask)
3341                 return;
3342
3343         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3344                 return;
3345
3346         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3347
3348         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3350                         nic_offset += nic_delta_between_qmans;
3351                         if (nic_id & 1) {
3352                                 nic_offset -= (nic_delta_between_qmans * 2);
3353                                 nic_offset += nic_delta_between_nics;
3354                         }
3355                         continue;
3356                 }
3357
3358                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3359                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3360                                                 nic_id * QMAN_STREAMS + i;
3361                         q = &gaudi->internal_qmans[internal_q_index];
3362                         qman_base_addr = (u64) q->pq_dma_addr;
3363                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3364                                                 qman_base_addr, nic_id);
3365                 }
3366
3367                 /* Enable the QMAN */
3368                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3369
3370                 nic_offset += nic_delta_between_qmans;
3371                 if (nic_id & 1) {
3372                         nic_offset -= (nic_delta_between_qmans * 2);
3373                         nic_offset += nic_delta_between_nics;
3374                 }
3375
3376                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3377         }
3378 }
3379
3380 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3381 {
3382         struct gaudi_device *gaudi = hdev->asic_specific;
3383
3384         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3385                 return;
3386
3387         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3388         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3389         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3390 }
3391
3392 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3393 {
3394         struct gaudi_device *gaudi = hdev->asic_specific;
3395
3396         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3397                 return;
3398
3399         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3400         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3401         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3402         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3403         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3404 }
3405
3406 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3407 {
3408         struct gaudi_device *gaudi = hdev->asic_specific;
3409
3410         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3411                 return;
3412
3413         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3414         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3415 }
3416
3417 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3418 {
3419         struct gaudi_device *gaudi = hdev->asic_specific;
3420         u32 tpc_offset = 0;
3421         int tpc_id;
3422
3423         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3424                 return;
3425
3426         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3427                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3428                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3429         }
3430 }
3431
3432 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3433 {
3434         struct gaudi_device *gaudi = hdev->asic_specific;
3435         u32 nic_mask, nic_offset = 0;
3436         u32 nic_delta_between_qmans =
3437                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3438         u32 nic_delta_between_nics =
3439                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3440         int nic_id;
3441
3442         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3443                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3444
3445                 if (gaudi->hw_cap_initialized & nic_mask)
3446                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3447
3448                 nic_offset += nic_delta_between_qmans;
3449                 if (nic_id & 1) {
3450                         nic_offset -= (nic_delta_between_qmans * 2);
3451                         nic_offset += nic_delta_between_nics;
3452                 }
3453         }
3454 }
3455
3456 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3457 {
3458         struct gaudi_device *gaudi = hdev->asic_specific;
3459
3460         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3461                 return;
3462
3463         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3464         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3465         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3466         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3467 }
3468
3469 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3470 {
3471         struct gaudi_device *gaudi = hdev->asic_specific;
3472
3473         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3474                 return;
3475
3476         /* Stop CPs of HBM DMA QMANs */
3477
3478         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3479         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3480         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3481         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3482         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3483 }
3484
3485 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3486 {
3487         struct gaudi_device *gaudi = hdev->asic_specific;
3488
3489         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3490                 return;
3491
3492         /* Stop CPs of MME QMANs */
3493         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3494         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3495 }
3496
3497 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3498 {
3499         struct gaudi_device *gaudi = hdev->asic_specific;
3500
3501         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3502                 return;
3503
3504         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3505         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3506         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3507         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3508         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3509         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3510         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3511         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3512 }
3513
3514 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3515 {
3516         struct gaudi_device *gaudi = hdev->asic_specific;
3517
3518         /* Stop upper CPs of QMANs */
3519
3520         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3521                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3522                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3525
3526         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3527                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3528                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3531
3532         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3533                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3534                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3537
3538         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3539                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3540                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3543
3544         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3545                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3546                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3547                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3548                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3549
3550         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3551                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3552                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3553                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3554                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3555
3556         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3557                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3558                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3559                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3560                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3561
3562         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3563                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3564                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3565                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3566                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3567
3568         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3569                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3570                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3571                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3572                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3573
3574         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3575                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3576                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3577                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3578                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3579 }
3580
3581 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3582 {
3583         struct gaudi_device *gaudi = hdev->asic_specific;
3584
3585         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3586                 return;
3587
3588         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3589         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3590         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3591 }
3592
3593 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3594 {
3595         struct gaudi_device *gaudi = hdev->asic_specific;
3596
3597         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3598                 return;
3599
3600         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3601         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3602         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3603         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3604         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3605 }
3606
3607 static void gaudi_mme_stall(struct hl_device *hdev)
3608 {
3609         struct gaudi_device *gaudi = hdev->asic_specific;
3610
3611         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3612                 return;
3613
3614         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3615         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3616         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3617         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3618         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3619         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3620         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3621         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3622         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3623         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3624         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3625         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3626         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3627         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3628         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3629         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3630         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3631 }
3632
3633 static void gaudi_tpc_stall(struct hl_device *hdev)
3634 {
3635         struct gaudi_device *gaudi = hdev->asic_specific;
3636
3637         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3638                 return;
3639
3640         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3641         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3642         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3643         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3644         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3645         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3646         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3647         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3648 }
3649
3650 static void gaudi_set_clock_gating(struct hl_device *hdev)
3651 {
3652         struct gaudi_device *gaudi = hdev->asic_specific;
3653         u32 qman_offset;
3654         bool enable;
3655         int i;
3656
3657         /* In case we are during debug session, don't enable the clock gate
3658          * as it may interfere
3659          */
3660         if (hdev->in_debug)
3661                 return;
3662
3663         if (hdev->asic_prop.fw_security_enabled)
3664                 return;
3665
3666         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3667                 enable = !!(hdev->clock_gating_mask &
3668                                 (BIT_ULL(gaudi_dma_assignment[i])));
3669
3670                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3671                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3672                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3673                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3674                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3675         }
3676
3677         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3678                 enable = !!(hdev->clock_gating_mask &
3679                                 (BIT_ULL(gaudi_dma_assignment[i])));
3680
3681                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3682                  * we need to not enable clock gating in that DMA
3683                  */
3684                 if (i == GAUDI_HBM_DMA_4)
3685                         enable = 0;
3686
3687                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3688                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3689                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3690                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3691                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3692         }
3693
3694         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3695         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3696         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3697
3698         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3699         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3700         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3701
3702         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3703                 enable = !!(hdev->clock_gating_mask &
3704                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3705
3706                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3707                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3708                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3709                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3710
3711                 qman_offset += TPC_QMAN_OFFSET;
3712         }
3713
3714         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3715 }
3716
3717 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3718 {
3719         struct gaudi_device *gaudi = hdev->asic_specific;
3720         u32 qman_offset;
3721         int i;
3722
3723         if (hdev->asic_prop.fw_security_enabled)
3724                 return;
3725
3726         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3727                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3728                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3729
3730                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3731         }
3732
3733         WREG32(mmMME0_QM_CGM_CFG, 0);
3734         WREG32(mmMME0_QM_CGM_CFG1, 0);
3735         WREG32(mmMME2_QM_CGM_CFG, 0);
3736         WREG32(mmMME2_QM_CGM_CFG1, 0);
3737
3738         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3739                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3740                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3741
3742                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3743         }
3744
3745         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3746 }
3747
3748 static void gaudi_enable_timestamp(struct hl_device *hdev)
3749 {
3750         /* Disable the timestamp counter */
3751         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3752
3753         /* Zero the lower/upper parts of the 64-bit counter */
3754         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3755         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3756
3757         /* Enable the counter */
3758         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3759 }
3760
3761 static void gaudi_disable_timestamp(struct hl_device *hdev)
3762 {
3763         /* Disable the timestamp counter */
3764         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3765 }
3766
3767 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3768 {
3769         u32 wait_timeout_ms;
3770
3771         dev_info(hdev->dev,
3772                 "Halting compute engines and disabling interrupts\n");
3773
3774         if (hdev->pldm)
3775                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3776         else
3777                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3778
3779         gaudi_stop_nic_qmans(hdev);
3780         gaudi_stop_mme_qmans(hdev);
3781         gaudi_stop_tpc_qmans(hdev);
3782         gaudi_stop_hbm_dma_qmans(hdev);
3783         gaudi_stop_pci_dma_qmans(hdev);
3784
3785         hdev->asic_funcs->disable_clock_gating(hdev);
3786
3787         msleep(wait_timeout_ms);
3788
3789         gaudi_pci_dma_stall(hdev);
3790         gaudi_hbm_dma_stall(hdev);
3791         gaudi_tpc_stall(hdev);
3792         gaudi_mme_stall(hdev);
3793
3794         msleep(wait_timeout_ms);
3795
3796         gaudi_disable_nic_qmans(hdev);
3797         gaudi_disable_mme_qmans(hdev);
3798         gaudi_disable_tpc_qmans(hdev);
3799         gaudi_disable_hbm_dma_qmans(hdev);
3800         gaudi_disable_pci_dma_qmans(hdev);
3801
3802         gaudi_disable_timestamp(hdev);
3803
3804         gaudi_disable_msi(hdev);
3805 }
3806
3807 static int gaudi_mmu_init(struct hl_device *hdev)
3808 {
3809         struct asic_fixed_properties *prop = &hdev->asic_prop;
3810         struct gaudi_device *gaudi = hdev->asic_specific;
3811         u64 hop0_addr;
3812         int rc, i;
3813
3814         if (!hdev->mmu_enable)
3815                 return 0;
3816
3817         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3818                 return 0;
3819
3820         for (i = 0 ; i < prop->max_asid ; i++) {
3821                 hop0_addr = prop->mmu_pgt_addr +
3822                                 (i * prop->mmu_hop_table_size);
3823
3824                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3825                 if (rc) {
3826                         dev_err(hdev->dev,
3827                                 "failed to set hop0 addr for asid %d\n", i);
3828                         goto err;
3829                 }
3830         }
3831
3832         /* init MMU cache manage page */
3833         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3834         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3835
3836         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3837
3838         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3839         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3840
3841         WREG32(mmSTLB_HOP_CONFIGURATION,
3842                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3843
3844         /*
3845          * The H/W expects the first PI after init to be 1. After wraparound
3846          * we'll write 0.
3847          */
3848         gaudi->mmu_cache_inv_pi = 1;
3849
3850         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3851
3852         return 0;
3853
3854 err:
3855         return rc;
3856 }
3857
3858 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3859 {
3860         void __iomem *dst;
3861
3862         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3863
3864         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3865 }
3866
3867 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3868 {
3869         void __iomem *dst;
3870
3871         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3872
3873         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3874 }
3875
3876 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3877 {
3878         struct dynamic_fw_load_mgr *dynamic_loader;
3879         struct cpu_dyn_regs *dyn_regs;
3880
3881         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3882
3883         /*
3884          * here we update initial values for few specific dynamic regs (as
3885          * before reading the first descriptor from FW those value has to be
3886          * hard-coded) in later stages of the protocol those values will be
3887          * updated automatically by reading the FW descriptor so data there
3888          * will always be up-to-date
3889          */
3890         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3891         dyn_regs->kmd_msg_to_cpu =
3892                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3893         dyn_regs->cpu_cmd_status_to_host =
3894                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3895
3896         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3897 }
3898
3899 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3900 {
3901         struct static_fw_load_mgr *static_loader;
3902
3903         static_loader = &hdev->fw_loader.static_loader;
3904
3905         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3906         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3907         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3908         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3909         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3910         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3911         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3912         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3913         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3914         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3915         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3916         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3917         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3918                         GAUDI_PLDM_RESET_WAIT_MSEC :
3919                         GAUDI_CPU_RESET_WAIT_MSEC;
3920 }
3921
3922 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3923 {
3924         struct asic_fixed_properties *prop = &hdev->asic_prop;
3925         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3926
3927         /* fill common fields */
3928         fw_loader->linux_loaded = false;
3929         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3930         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3931         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3932         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3933         fw_loader->skip_bmc = !hdev->bmc_enable;
3934         fw_loader->sram_bar_id = SRAM_BAR_ID;
3935         fw_loader->dram_bar_id = HBM_BAR_ID;
3936
3937         if (prop->dynamic_fw_load)
3938                 gaudi_init_dynamic_firmware_loader(hdev);
3939         else
3940                 gaudi_init_static_firmware_loader(hdev);
3941 }
3942
3943 static int gaudi_init_cpu(struct hl_device *hdev)
3944 {
3945         struct gaudi_device *gaudi = hdev->asic_specific;
3946         int rc;
3947
3948         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3949                 return 0;
3950
3951         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3952                 return 0;
3953
3954         /*
3955          * The device CPU works with 40 bits addresses.
3956          * This register sets the extension to 50 bits.
3957          */
3958         if (!hdev->asic_prop.fw_security_enabled)
3959                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3960
3961         rc = hl_fw_init_cpu(hdev);
3962
3963         if (rc)
3964                 return rc;
3965
3966         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3967
3968         return 0;
3969 }
3970
3971 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3972 {
3973         struct cpu_dyn_regs *dyn_regs =
3974                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3975         struct asic_fixed_properties *prop = &hdev->asic_prop;
3976         struct gaudi_device *gaudi = hdev->asic_specific;
3977         u32 status, irq_handler_offset;
3978         struct hl_eq *eq;
3979         struct hl_hw_queue *cpu_pq =
3980                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3981         int err;
3982
3983         if (!hdev->cpu_queues_enable)
3984                 return 0;
3985
3986         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3987                 return 0;
3988
3989         eq = &hdev->event_queue;
3990
3991         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3992         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3993
3994         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3995         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3996
3997         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3998                         lower_32_bits(hdev->cpu_accessible_dma_address));
3999         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4000                         upper_32_bits(hdev->cpu_accessible_dma_address));
4001
4002         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4003         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4004         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4005
4006         /* Used for EQ CI */
4007         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4008
4009         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4010
4011         if (gaudi->multi_msi_mode)
4012                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4013         else
4014                 WREG32(mmCPU_IF_QUEUE_INIT,
4015                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4016
4017         irq_handler_offset = prop->gic_interrupts_enable ?
4018                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4019                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4020
4021         WREG32(irq_handler_offset,
4022                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4023
4024         err = hl_poll_timeout(
4025                 hdev,
4026                 mmCPU_IF_QUEUE_INIT,
4027                 status,
4028                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4029                 1000,
4030                 cpu_timeout);
4031
4032         if (err) {
4033                 dev_err(hdev->dev,
4034                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4035                 return -EIO;
4036         }
4037
4038         /* update FW application security bits */
4039         if (prop->fw_cpu_boot_dev_sts0_valid)
4040                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4041         if (prop->fw_cpu_boot_dev_sts1_valid)
4042                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4043
4044         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4045         return 0;
4046 }
4047
4048 static void gaudi_pre_hw_init(struct hl_device *hdev)
4049 {
4050         /* Perform read from the device to make sure device is up */
4051         RREG32(mmHW_STATE);
4052
4053         if (!hdev->asic_prop.fw_security_enabled) {
4054                 /* Set the access through PCI bars (Linux driver only) as
4055                  * secured
4056                  */
4057                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4058                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4059                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4060
4061                 /* Perform read to flush the waiting writes to ensure
4062                  * configuration was set in the device
4063                  */
4064                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4065         }
4066
4067         /*
4068          * Let's mark in the H/W that we have reached this point. We check
4069          * this value in the reset_before_init function to understand whether
4070          * we need to reset the chip before doing H/W init. This register is
4071          * cleared by the H/W upon H/W reset
4072          */
4073         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4074 }
4075
4076 static int gaudi_hw_init(struct hl_device *hdev)
4077 {
4078         struct gaudi_device *gaudi = hdev->asic_specific;
4079         int rc;
4080
4081         gaudi_pre_hw_init(hdev);
4082
4083         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4084          * So we set it here and if anyone tries to move it later to
4085          * a different address, there will be an error
4086          */
4087         if (hdev->asic_prop.iatu_done_by_fw)
4088                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4089
4090         /*
4091          * Before pushing u-boot/linux to device, need to set the hbm bar to
4092          * base address of dram
4093          */
4094         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4095                 dev_err(hdev->dev,
4096                         "failed to map HBM bar to DRAM base address\n");
4097                 return -EIO;
4098         }
4099
4100         rc = gaudi_init_cpu(hdev);
4101         if (rc) {
4102                 dev_err(hdev->dev, "failed to initialize CPU\n");
4103                 return rc;
4104         }
4105
4106         /* In case the clock gating was enabled in preboot we need to disable
4107          * it here before touching the MME/TPC registers.
4108          * There is no need to take clk gating mutex because when this function
4109          * runs, no other relevant code can run
4110          */
4111         hdev->asic_funcs->disable_clock_gating(hdev);
4112
4113         /* SRAM scrambler must be initialized after CPU is running from HBM */
4114         gaudi_init_scrambler_sram(hdev);
4115
4116         /* This is here just in case we are working without CPU */
4117         gaudi_init_scrambler_hbm(hdev);
4118
4119         gaudi_init_golden_registers(hdev);
4120
4121         rc = gaudi_mmu_init(hdev);
4122         if (rc)
4123                 return rc;
4124
4125         gaudi_init_security(hdev);
4126
4127         gaudi_init_pci_dma_qmans(hdev);
4128
4129         gaudi_init_hbm_dma_qmans(hdev);
4130
4131         gaudi_init_mme_qmans(hdev);
4132
4133         gaudi_init_tpc_qmans(hdev);
4134
4135         gaudi_init_nic_qmans(hdev);
4136
4137         hdev->asic_funcs->set_clock_gating(hdev);
4138
4139         gaudi_enable_timestamp(hdev);
4140
4141         /* MSI must be enabled before CPU queues and NIC are initialized */
4142         rc = gaudi_enable_msi(hdev);
4143         if (rc)
4144                 goto disable_queues;
4145
4146         /* must be called after MSI was enabled */
4147         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4148         if (rc) {
4149                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4150                         rc);
4151                 goto disable_msi;
4152         }
4153
4154         /* Perform read from the device to flush all configuration */
4155         RREG32(mmHW_STATE);
4156
4157         return 0;
4158
4159 disable_msi:
4160         gaudi_disable_msi(hdev);
4161 disable_queues:
4162         gaudi_disable_mme_qmans(hdev);
4163         gaudi_disable_pci_dma_qmans(hdev);
4164
4165         return rc;
4166 }
4167
4168 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4169 {
4170         struct cpu_dyn_regs *dyn_regs =
4171                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4172         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4173         struct gaudi_device *gaudi = hdev->asic_specific;
4174         bool driver_performs_reset;
4175
4176         if (!hard_reset) {
4177                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4178                 return;
4179         }
4180
4181         if (hdev->pldm) {
4182                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4183                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4184         } else {
4185                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4186                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4187         }
4188
4189         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4190                                         !hdev->asic_prop.hard_reset_done_by_fw);
4191
4192         /* Set device to handle FLR by H/W as we will put the device CPU to
4193          * halt mode
4194          */
4195         if (driver_performs_reset)
4196                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4197                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4198
4199         /* If linux is loaded in the device CPU we need to communicate with it
4200          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4201          * registers in case of old F/Ws
4202          */
4203         if (hdev->fw_loader.linux_loaded) {
4204                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4205                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4206                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4207
4208                 WREG32(irq_handler_offset,
4209                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4210         } else {
4211                 if (hdev->asic_prop.hard_reset_done_by_fw)
4212                         hl_fw_ask_hard_reset_without_linux(hdev);
4213                 else
4214                         hl_fw_ask_halt_machine_without_linux(hdev);
4215         }
4216
4217         if (driver_performs_reset) {
4218
4219                 /* Configure the reset registers. Must be done as early as
4220                  * possible in case we fail during H/W initialization
4221                  */
4222                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4223                                                 (CFG_RST_H_DMA_MASK |
4224                                                 CFG_RST_H_MME_MASK |
4225                                                 CFG_RST_H_SM_MASK |
4226                                                 CFG_RST_H_TPC_7_MASK));
4227
4228                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4229
4230                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4231                                                 (CFG_RST_H_HBM_MASK |
4232                                                 CFG_RST_H_TPC_7_MASK |
4233                                                 CFG_RST_H_NIC_MASK |
4234                                                 CFG_RST_H_SM_MASK |
4235                                                 CFG_RST_H_DMA_MASK |
4236                                                 CFG_RST_H_MME_MASK |
4237                                                 CFG_RST_H_CPU_MASK |
4238                                                 CFG_RST_H_MMU_MASK));
4239
4240                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4241                                                 (CFG_RST_L_IF_MASK |
4242                                                 CFG_RST_L_PSOC_MASK |
4243                                                 CFG_RST_L_TPC_MASK));
4244
4245                 msleep(cpu_timeout_ms);
4246
4247                 /* Tell ASIC not to re-initialize PCIe */
4248                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4249
4250                 /* Restart BTL/BLR upon hard-reset */
4251                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4252
4253                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4254                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4255
4256                 dev_info(hdev->dev,
4257                         "Issued HARD reset command, going to wait %dms\n",
4258                         reset_timeout_ms);
4259         } else {
4260                 dev_info(hdev->dev,
4261                         "Firmware performs HARD reset, going to wait %dms\n",
4262                         reset_timeout_ms);
4263         }
4264
4265         /*
4266          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4267          * itself is in reset. Need to wait until the reset is deasserted
4268          */
4269         msleep(reset_timeout_ms);
4270
4271         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4272         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4273                 dev_err(hdev->dev,
4274                         "Timeout while waiting for device to reset 0x%x\n",
4275                         status);
4276
4277         if (gaudi) {
4278                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4279                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4280                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4281                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4282                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4283                                 HW_CAP_SRAM_SCRAMBLER |
4284                                 HW_CAP_HBM_SCRAMBLER |
4285                                 HW_CAP_CLK_GATE);
4286
4287                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4288
4289                 hdev->device_cpu_is_halted = false;
4290         }
4291 }
4292
4293 static int gaudi_suspend(struct hl_device *hdev)
4294 {
4295         int rc;
4296
4297         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4298         if (rc)
4299                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4300
4301         return rc;
4302 }
4303
4304 static int gaudi_resume(struct hl_device *hdev)
4305 {
4306         return gaudi_init_iatu(hdev);
4307 }
4308
4309 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4310                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4311 {
4312         int rc;
4313
4314         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4315                         VM_DONTCOPY | VM_NORESERVE;
4316
4317         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4318                                 (dma_addr - HOST_PHYS_BASE), size);
4319         if (rc)
4320                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4321
4322         return rc;
4323 }
4324
4325 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4326 {
4327         struct cpu_dyn_regs *dyn_regs =
4328                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4329         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4330         struct gaudi_device *gaudi = hdev->asic_specific;
4331         bool invalid_queue = false;
4332         int dma_id;
4333
4334         switch (hw_queue_id) {
4335         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4336                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4337                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4338                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4339                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4340                 break;
4341
4342         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4343                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4344                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4345                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4346                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4347                 break;
4348
4349         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4350                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4351                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4352                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4353                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4354                 break;
4355
4356         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4357                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4358                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4359                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4360                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4361                 break;
4362
4363         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4364                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4365                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4366                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4367                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4368                 break;
4369
4370         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4371                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4372                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4373                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4374                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4375                 break;
4376
4377         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4378                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4379                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4380                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4381                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4382                 break;
4383
4384         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4385                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4386                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4387                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4388                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4389                 break;
4390
4391         case GAUDI_QUEUE_ID_CPU_PQ:
4392                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4393                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4394                 else
4395                         invalid_queue = true;
4396                 break;
4397
4398         case GAUDI_QUEUE_ID_MME_0_0:
4399                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4400                 break;
4401
4402         case GAUDI_QUEUE_ID_MME_0_1:
4403                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4404                 break;
4405
4406         case GAUDI_QUEUE_ID_MME_0_2:
4407                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4408                 break;
4409
4410         case GAUDI_QUEUE_ID_MME_0_3:
4411                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4412                 break;
4413
4414         case GAUDI_QUEUE_ID_MME_1_0:
4415                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4416                 break;
4417
4418         case GAUDI_QUEUE_ID_MME_1_1:
4419                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4420                 break;
4421
4422         case GAUDI_QUEUE_ID_MME_1_2:
4423                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4424                 break;
4425
4426         case GAUDI_QUEUE_ID_MME_1_3:
4427                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4428                 break;
4429
4430         case GAUDI_QUEUE_ID_TPC_0_0:
4431                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4432                 break;
4433
4434         case GAUDI_QUEUE_ID_TPC_0_1:
4435                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4436                 break;
4437
4438         case GAUDI_QUEUE_ID_TPC_0_2:
4439                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4440                 break;
4441
4442         case GAUDI_QUEUE_ID_TPC_0_3:
4443                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4444                 break;
4445
4446         case GAUDI_QUEUE_ID_TPC_1_0:
4447                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4448                 break;
4449
4450         case GAUDI_QUEUE_ID_TPC_1_1:
4451                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4452                 break;
4453
4454         case GAUDI_QUEUE_ID_TPC_1_2:
4455                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_TPC_1_3:
4459                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4460                 break;
4461
4462         case GAUDI_QUEUE_ID_TPC_2_0:
4463                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4464                 break;
4465
4466         case GAUDI_QUEUE_ID_TPC_2_1:
4467                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4468                 break;
4469
4470         case GAUDI_QUEUE_ID_TPC_2_2:
4471                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_TPC_2_3:
4475                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4476                 break;
4477
4478         case GAUDI_QUEUE_ID_TPC_3_0:
4479                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4480                 break;
4481
4482         case GAUDI_QUEUE_ID_TPC_3_1:
4483                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4484                 break;
4485
4486         case GAUDI_QUEUE_ID_TPC_3_2:
4487                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_TPC_3_3:
4491                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4492                 break;
4493
4494         case GAUDI_QUEUE_ID_TPC_4_0:
4495                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_TPC_4_1:
4499                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4500                 break;
4501
4502         case GAUDI_QUEUE_ID_TPC_4_2:
4503                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_TPC_4_3:
4507                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4508                 break;
4509
4510         case GAUDI_QUEUE_ID_TPC_5_0:
4511                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_TPC_5_1:
4515                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4516                 break;
4517
4518         case GAUDI_QUEUE_ID_TPC_5_2:
4519                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_TPC_5_3:
4523                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4524                 break;
4525
4526         case GAUDI_QUEUE_ID_TPC_6_0:
4527                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_TPC_6_1:
4531                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4532                 break;
4533
4534         case GAUDI_QUEUE_ID_TPC_6_2:
4535                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_TPC_6_3:
4539                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4540                 break;
4541
4542         case GAUDI_QUEUE_ID_TPC_7_0:
4543                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4544                 break;
4545
4546         case GAUDI_QUEUE_ID_TPC_7_1:
4547                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4548                 break;
4549
4550         case GAUDI_QUEUE_ID_TPC_7_2:
4551                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4552                 break;
4553
4554         case GAUDI_QUEUE_ID_TPC_7_3:
4555                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4556                 break;
4557
4558         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4559                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4560                         invalid_queue = true;
4561
4562                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4563                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4564                 break;
4565
4566         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4567                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4568                         invalid_queue = true;
4569
4570                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4571                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4572                 break;
4573
4574         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4575                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4576                         invalid_queue = true;
4577
4578                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4579                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4580                 break;
4581
4582         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4583                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4584                         invalid_queue = true;
4585
4586                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4587                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4588                 break;
4589
4590         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4591                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4592                         invalid_queue = true;
4593
4594                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4595                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4596                 break;
4597
4598         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4599                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4600                         invalid_queue = true;
4601
4602                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4603                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4604                 break;
4605
4606         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4607                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4608                         invalid_queue = true;
4609
4610                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4611                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4612                 break;
4613
4614         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4615                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4616                         invalid_queue = true;
4617
4618                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4619                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4620                 break;
4621
4622         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4623                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4624                         invalid_queue = true;
4625
4626                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4627                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4628                 break;
4629
4630         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4631                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4632                         invalid_queue = true;
4633
4634                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4635                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4636                 break;
4637
4638         default:
4639                 invalid_queue = true;
4640         }
4641
4642         if (invalid_queue) {
4643                 /* Should never get here */
4644                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4645                         hw_queue_id);
4646                 return;
4647         }
4648
4649         db_value = pi;
4650
4651         /* ring the doorbell */
4652         WREG32(db_reg_offset, db_value);
4653
4654         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4655                 /* make sure device CPU will read latest data from host */
4656                 mb();
4657
4658                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4659                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4660                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4661
4662                 WREG32(irq_handler_offset,
4663                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4664         }
4665 }
4666
4667 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4668                                 struct hl_bd *bd)
4669 {
4670         __le64 *pbd = (__le64 *) bd;
4671
4672         /* The QMANs are on the host memory so a simple copy suffice */
4673         pqe[0] = pbd[0];
4674         pqe[1] = pbd[1];
4675 }
4676
4677 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4678                                         dma_addr_t *dma_handle, gfp_t flags)
4679 {
4680         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4681                                                 dma_handle, flags);
4682
4683         /* Shift to the device's base physical address of host memory */
4684         if (kernel_addr)
4685                 *dma_handle += HOST_PHYS_BASE;
4686
4687         return kernel_addr;
4688 }
4689
4690 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4691                 void *cpu_addr, dma_addr_t dma_handle)
4692 {
4693         /* Cancel the device's base physical address of host memory */
4694         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4695
4696         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4697 }
4698
4699 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4700 {
4701         struct asic_fixed_properties *prop = &hdev->asic_prop;
4702         u64  cur_addr = DRAM_BASE_ADDR_USER;
4703         u32 val;
4704         u32 chunk_size;
4705         int rc, dma_id;
4706
4707         while (cur_addr < prop->dram_end_address) {
4708                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4709                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4710
4711                         chunk_size =
4712                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4713
4714                         dev_dbg(hdev->dev,
4715                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4716                                 cur_addr, cur_addr + chunk_size);
4717
4718                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4719                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4720                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4721                                                 lower_32_bits(cur_addr));
4722                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4723                                                 upper_32_bits(cur_addr));
4724                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4725                                         chunk_size);
4726                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4727                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4728                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4729
4730                         cur_addr += chunk_size;
4731
4732                         if (cur_addr == prop->dram_end_address)
4733                                 break;
4734                 }
4735
4736                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4737                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4738
4739                         rc = hl_poll_timeout(
4740                                 hdev,
4741                                 mmDMA0_CORE_STS0 + dma_offset,
4742                                 val,
4743                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4744                                 1000,
4745                                 HBM_SCRUBBING_TIMEOUT_US);
4746
4747                         if (rc) {
4748                                 dev_err(hdev->dev,
4749                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4750                                         dma_id);
4751                                 return -EIO;
4752                         }
4753                 }
4754         }
4755
4756         return 0;
4757 }
4758
4759 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4760 {
4761         struct asic_fixed_properties *prop = &hdev->asic_prop;
4762         struct gaudi_device *gaudi = hdev->asic_specific;
4763         int rc = 0;
4764         u64 val = 0;
4765
4766         if (!hdev->memory_scrub)
4767                 return 0;
4768
4769         if (!addr && !size) {
4770                 /* Wait till device is idle */
4771                 rc = hl_poll_timeout(
4772                                 hdev,
4773                                 mmDMA0_CORE_STS0/* dummy */,
4774                                 val/* dummy */,
4775                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4776                                                 0, NULL)),
4777                                                 1000,
4778                                                 HBM_SCRUBBING_TIMEOUT_US);
4779                 if (rc) {
4780                         dev_err(hdev->dev, "waiting for idle timeout\n");
4781                         return -EIO;
4782                 }
4783
4784                 /* Scrub SRAM */
4785                 addr = prop->sram_user_base_address;
4786                 size = hdev->pldm ? 0x10000 :
4787                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4788                 val = 0x7777777777777777ull;
4789
4790                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4791                 if (rc) {
4792                         dev_err(hdev->dev,
4793                                 "Failed to clear SRAM in mem scrub all\n");
4794                         return rc;
4795                 }
4796
4797                 mutex_lock(&gaudi->clk_gate_mutex);
4798                 hdev->asic_funcs->disable_clock_gating(hdev);
4799
4800                 /* Scrub HBM using all DMA channels in parallel */
4801                 rc = gaudi_hbm_scrubbing(hdev);
4802                 if (rc)
4803                         dev_err(hdev->dev,
4804                                 "Failed to clear HBM in mem scrub all\n");
4805
4806                 hdev->asic_funcs->set_clock_gating(hdev);
4807                 mutex_unlock(&gaudi->clk_gate_mutex);
4808         }
4809
4810         return rc;
4811 }
4812
4813 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4814                                 u32 queue_id, dma_addr_t *dma_handle,
4815                                 u16 *queue_len)
4816 {
4817         struct gaudi_device *gaudi = hdev->asic_specific;
4818         struct gaudi_internal_qman_info *q;
4819
4820         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4821                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4822                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4823                 return NULL;
4824         }
4825
4826         q = &gaudi->internal_qmans[queue_id];
4827         *dma_handle = q->pq_dma_addr;
4828         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4829
4830         return q->pq_kernel_addr;
4831 }
4832
4833 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4834                                 u16 len, u32 timeout, u64 *result)
4835 {
4836         struct gaudi_device *gaudi = hdev->asic_specific;
4837
4838         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4839                 if (result)
4840                         *result = 0;
4841                 return 0;
4842         }
4843
4844         if (!timeout)
4845                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4846
4847         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4848                                                 timeout, result);
4849 }
4850
4851 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4852 {
4853         struct packet_msg_prot *fence_pkt;
4854         dma_addr_t pkt_dma_addr;
4855         u32 fence_val, tmp, timeout_usec;
4856         dma_addr_t fence_dma_addr;
4857         u32 *fence_ptr;
4858         int rc;
4859
4860         if (hdev->pldm)
4861                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4862         else
4863                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4864
4865         fence_val = GAUDI_QMAN0_FENCE_VAL;
4866
4867         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4868                                                         &fence_dma_addr);
4869         if (!fence_ptr) {
4870                 dev_err(hdev->dev,
4871                         "Failed to allocate memory for H/W queue %d testing\n",
4872                         hw_queue_id);
4873                 return -ENOMEM;
4874         }
4875
4876         *fence_ptr = 0;
4877
4878         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4879                                         sizeof(struct packet_msg_prot),
4880                                         GFP_KERNEL, &pkt_dma_addr);
4881         if (!fence_pkt) {
4882                 dev_err(hdev->dev,
4883                         "Failed to allocate packet for H/W queue %d testing\n",
4884                         hw_queue_id);
4885                 rc = -ENOMEM;
4886                 goto free_fence_ptr;
4887         }
4888
4889         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4890         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4891         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4892
4893         fence_pkt->ctl = cpu_to_le32(tmp);
4894         fence_pkt->value = cpu_to_le32(fence_val);
4895         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4896
4897         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4898                                         sizeof(struct packet_msg_prot),
4899                                         pkt_dma_addr);
4900         if (rc) {
4901                 dev_err(hdev->dev,
4902                         "Failed to send fence packet to H/W queue %d\n",
4903                         hw_queue_id);
4904                 goto free_pkt;
4905         }
4906
4907         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4908                                         1000, timeout_usec, true);
4909
4910         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4911
4912         if (rc == -ETIMEDOUT) {
4913                 dev_err(hdev->dev,
4914                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4915                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4916                 rc = -EIO;
4917         }
4918
4919 free_pkt:
4920         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4921                                         pkt_dma_addr);
4922 free_fence_ptr:
4923         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4924                                         fence_dma_addr);
4925         return rc;
4926 }
4927
4928 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4929 {
4930         struct gaudi_device *gaudi = hdev->asic_specific;
4931
4932         /*
4933          * check capability here as send_cpu_message() won't update the result
4934          * value if no capability
4935          */
4936         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4937                 return 0;
4938
4939         return hl_fw_test_cpu_queue(hdev);
4940 }
4941
4942 static int gaudi_test_queues(struct hl_device *hdev)
4943 {
4944         int i, rc, ret_val = 0;
4945
4946         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4947                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4948                         rc = gaudi_test_queue(hdev, i);
4949                         if (rc)
4950                                 ret_val = -EINVAL;
4951                 }
4952         }
4953
4954         rc = gaudi_test_cpu_queue(hdev);
4955         if (rc)
4956                 ret_val = -EINVAL;
4957
4958         return ret_val;
4959 }
4960
4961 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4962                 gfp_t mem_flags, dma_addr_t *dma_handle)
4963 {
4964         void *kernel_addr;
4965
4966         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4967                 return NULL;
4968
4969         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4970
4971         /* Shift to the device's base physical address of host memory */
4972         if (kernel_addr)
4973                 *dma_handle += HOST_PHYS_BASE;
4974
4975         return kernel_addr;
4976 }
4977
4978 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4979                         dma_addr_t dma_addr)
4980 {
4981         /* Cancel the device's base physical address of host memory */
4982         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4983
4984         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4985 }
4986
4987 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4988                                         size_t size, dma_addr_t *dma_handle)
4989 {
4990         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4991 }
4992
4993 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4994                                                 size_t size, void *vaddr)
4995 {
4996         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4997 }
4998
4999 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5000                         int nents, enum dma_data_direction dir)
5001 {
5002         struct scatterlist *sg;
5003         int i;
5004
5005         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5006                 return -ENOMEM;
5007
5008         /* Shift to the device's base physical address of host memory */
5009         for_each_sg(sgl, sg, nents, i)
5010                 sg->dma_address += HOST_PHYS_BASE;
5011
5012         return 0;
5013 }
5014
5015 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5016                         int nents, enum dma_data_direction dir)
5017 {
5018         struct scatterlist *sg;
5019         int i;
5020
5021         /* Cancel the device's base physical address of host memory */
5022         for_each_sg(sgl, sg, nents, i)
5023                 sg->dma_address -= HOST_PHYS_BASE;
5024
5025         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5026 }
5027
5028 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5029                                         struct sg_table *sgt)
5030 {
5031         struct scatterlist *sg, *sg_next_iter;
5032         u32 count, dma_desc_cnt;
5033         u64 len, len_next;
5034         dma_addr_t addr, addr_next;
5035
5036         dma_desc_cnt = 0;
5037
5038         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5039
5040                 len = sg_dma_len(sg);
5041                 addr = sg_dma_address(sg);
5042
5043                 if (len == 0)
5044                         break;
5045
5046                 while ((count + 1) < sgt->nents) {
5047                         sg_next_iter = sg_next(sg);
5048                         len_next = sg_dma_len(sg_next_iter);
5049                         addr_next = sg_dma_address(sg_next_iter);
5050
5051                         if (len_next == 0)
5052                                 break;
5053
5054                         if ((addr + len == addr_next) &&
5055                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5056                                 len += len_next;
5057                                 count++;
5058                                 sg = sg_next_iter;
5059                         } else {
5060                                 break;
5061                         }
5062                 }
5063
5064                 dma_desc_cnt++;
5065         }
5066
5067         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5068 }
5069
5070 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5071                                 struct hl_cs_parser *parser,
5072                                 struct packet_lin_dma *user_dma_pkt,
5073                                 u64 addr, enum dma_data_direction dir)
5074 {
5075         struct hl_userptr *userptr;
5076         int rc;
5077
5078         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5079                         parser->job_userptr_list, &userptr))
5080                 goto already_pinned;
5081
5082         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5083         if (!userptr)
5084                 return -ENOMEM;
5085
5086         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5087                                 userptr);
5088         if (rc)
5089                 goto free_userptr;
5090
5091         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5092
5093         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5094                                         userptr->sgt->nents, dir);
5095         if (rc) {
5096                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5097                 goto unpin_memory;
5098         }
5099
5100         userptr->dma_mapped = true;
5101         userptr->dir = dir;
5102
5103 already_pinned:
5104         parser->patched_cb_size +=
5105                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5106
5107         return 0;
5108
5109 unpin_memory:
5110         list_del(&userptr->job_node);
5111         hl_unpin_host_memory(hdev, userptr);
5112 free_userptr:
5113         kfree(userptr);
5114         return rc;
5115 }
5116
5117 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5118                                 struct hl_cs_parser *parser,
5119                                 struct packet_lin_dma *user_dma_pkt,
5120                                 bool src_in_host)
5121 {
5122         enum dma_data_direction dir;
5123         bool skip_host_mem_pin = false, user_memset;
5124         u64 addr;
5125         int rc = 0;
5126
5127         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5128                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5129                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5130
5131         if (src_in_host) {
5132                 if (user_memset)
5133                         skip_host_mem_pin = true;
5134
5135                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5136                 dir = DMA_TO_DEVICE;
5137                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5138         } else {
5139                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5140                 dir = DMA_FROM_DEVICE;
5141                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5142                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5143                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5144         }
5145
5146         if (skip_host_mem_pin)
5147                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5148         else
5149                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5150                                                 addr, dir);
5151
5152         return rc;
5153 }
5154
5155 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5156                                 struct hl_cs_parser *parser,
5157                                 struct packet_lin_dma *user_dma_pkt)
5158 {
5159         bool src_in_host = false;
5160         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5161                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5162                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5163
5164         dev_dbg(hdev->dev, "DMA packet details:\n");
5165         dev_dbg(hdev->dev, "source == 0x%llx\n",
5166                                 le64_to_cpu(user_dma_pkt->src_addr));
5167         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5168         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5169
5170         /*
5171          * Special handling for DMA with size 0. Bypass all validations
5172          * because no transactions will be done except for WR_COMP, which
5173          * is not a security issue
5174          */
5175         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5176                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5177                 return 0;
5178         }
5179
5180         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5181                 src_in_host = true;
5182
5183         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5184                                                 src_in_host);
5185 }
5186
5187 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5188                                         struct hl_cs_parser *parser,
5189                                         struct packet_load_and_exe *user_pkt)
5190 {
5191         u32 cfg;
5192
5193         cfg = le32_to_cpu(user_pkt->cfg);
5194
5195         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5196                 dev_err(hdev->dev,
5197                         "User not allowed to use Load and Execute\n");
5198                 return -EPERM;
5199         }
5200
5201         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5202
5203         return 0;
5204 }
5205
5206 static int gaudi_validate_cb(struct hl_device *hdev,
5207                         struct hl_cs_parser *parser, bool is_mmu)
5208 {
5209         u32 cb_parsed_length = 0;
5210         int rc = 0;
5211
5212         parser->patched_cb_size = 0;
5213
5214         /* cb_user_size is more than 0 so loop will always be executed */
5215         while (cb_parsed_length < parser->user_cb_size) {
5216                 enum packet_id pkt_id;
5217                 u16 pkt_size;
5218                 struct gaudi_packet *user_pkt;
5219
5220                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5221
5222                 pkt_id = (enum packet_id) (
5223                                 (le64_to_cpu(user_pkt->header) &
5224                                 PACKET_HEADER_PACKET_ID_MASK) >>
5225                                         PACKET_HEADER_PACKET_ID_SHIFT);
5226
5227                 if (!validate_packet_id(pkt_id)) {
5228                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5229                         rc = -EINVAL;
5230                         break;
5231                 }
5232
5233                 pkt_size = gaudi_packet_sizes[pkt_id];
5234                 cb_parsed_length += pkt_size;
5235                 if (cb_parsed_length > parser->user_cb_size) {
5236                         dev_err(hdev->dev,
5237                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5238                         rc = -EINVAL;
5239                         break;
5240                 }
5241
5242                 switch (pkt_id) {
5243                 case PACKET_MSG_PROT:
5244                         dev_err(hdev->dev,
5245                                 "User not allowed to use MSG_PROT\n");
5246                         rc = -EPERM;
5247                         break;
5248
5249                 case PACKET_CP_DMA:
5250                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5251                         rc = -EPERM;
5252                         break;
5253
5254                 case PACKET_STOP:
5255                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5256                         rc = -EPERM;
5257                         break;
5258
5259                 case PACKET_WREG_BULK:
5260                         dev_err(hdev->dev,
5261                                 "User not allowed to use WREG_BULK\n");
5262                         rc = -EPERM;
5263                         break;
5264
5265                 case PACKET_LOAD_AND_EXE:
5266                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5267                                 (struct packet_load_and_exe *) user_pkt);
5268                         break;
5269
5270                 case PACKET_LIN_DMA:
5271                         parser->contains_dma_pkt = true;
5272                         if (is_mmu)
5273                                 parser->patched_cb_size += pkt_size;
5274                         else
5275                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5276                                         (struct packet_lin_dma *) user_pkt);
5277                         break;
5278
5279                 case PACKET_WREG_32:
5280                 case PACKET_MSG_LONG:
5281                 case PACKET_MSG_SHORT:
5282                 case PACKET_REPEAT:
5283                 case PACKET_FENCE:
5284                 case PACKET_NOP:
5285                 case PACKET_ARB_POINT:
5286                         parser->patched_cb_size += pkt_size;
5287                         break;
5288
5289                 default:
5290                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5291                                 pkt_id);
5292                         rc = -EINVAL;
5293                         break;
5294                 }
5295
5296                 if (rc)
5297                         break;
5298         }
5299
5300         /*
5301          * The new CB should have space at the end for two MSG_PROT packets:
5302          * 1. A packet that will act as a completion packet
5303          * 2. A packet that will generate MSI-X interrupt
5304          */
5305         if (parser->completion)
5306                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5307
5308         return rc;
5309 }
5310
5311 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5312                                 struct hl_cs_parser *parser,
5313                                 struct packet_lin_dma *user_dma_pkt,
5314                                 struct packet_lin_dma *new_dma_pkt,
5315                                 u32 *new_dma_pkt_size)
5316 {
5317         struct hl_userptr *userptr;
5318         struct scatterlist *sg, *sg_next_iter;
5319         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5320         u64 len, len_next;
5321         dma_addr_t dma_addr, dma_addr_next;
5322         u64 device_memory_addr, addr;
5323         enum dma_data_direction dir;
5324         struct sg_table *sgt;
5325         bool src_in_host = false;
5326         bool skip_host_mem_pin = false;
5327         bool user_memset;
5328
5329         ctl = le32_to_cpu(user_dma_pkt->ctl);
5330
5331         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5332                 src_in_host = true;
5333
5334         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5335                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5336
5337         if (src_in_host) {
5338                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5339                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5340                 dir = DMA_TO_DEVICE;
5341                 if (user_memset)
5342                         skip_host_mem_pin = true;
5343         } else {
5344                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5345                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5346                 dir = DMA_FROM_DEVICE;
5347         }
5348
5349         if ((!skip_host_mem_pin) &&
5350                 (!hl_userptr_is_pinned(hdev, addr,
5351                                         le32_to_cpu(user_dma_pkt->tsize),
5352                                         parser->job_userptr_list, &userptr))) {
5353                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5354                                 addr, user_dma_pkt->tsize);
5355                 return -EFAULT;
5356         }
5357
5358         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5359                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5360                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5361                 return 0;
5362         }
5363
5364         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5365
5366         sgt = userptr->sgt;
5367         dma_desc_cnt = 0;
5368
5369         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5370                 len = sg_dma_len(sg);
5371                 dma_addr = sg_dma_address(sg);
5372
5373                 if (len == 0)
5374                         break;
5375
5376                 while ((count + 1) < sgt->nents) {
5377                         sg_next_iter = sg_next(sg);
5378                         len_next = sg_dma_len(sg_next_iter);
5379                         dma_addr_next = sg_dma_address(sg_next_iter);
5380
5381                         if (len_next == 0)
5382                                 break;
5383
5384                         if ((dma_addr + len == dma_addr_next) &&
5385                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5386                                 len += len_next;
5387                                 count++;
5388                                 sg = sg_next_iter;
5389                         } else {
5390                                 break;
5391                         }
5392                 }
5393
5394                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5395                 if (likely(dma_desc_cnt))
5396                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5397                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5398                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5399                 new_dma_pkt->tsize = cpu_to_le32(len);
5400
5401                 if (dir == DMA_TO_DEVICE) {
5402                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5403                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5404                 } else {
5405                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5406                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5407                 }
5408
5409                 if (!user_memset)
5410                         device_memory_addr += len;
5411                 dma_desc_cnt++;
5412                 new_dma_pkt++;
5413         }
5414
5415         if (!dma_desc_cnt) {
5416                 dev_err(hdev->dev,
5417                         "Error of 0 SG entries when patching DMA packet\n");
5418                 return -EFAULT;
5419         }
5420
5421         /* Fix the last dma packet - wrcomp must be as user set it */
5422         new_dma_pkt--;
5423         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5424
5425         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5426
5427         return 0;
5428 }
5429
5430 static int gaudi_patch_cb(struct hl_device *hdev,
5431                                 struct hl_cs_parser *parser)
5432 {
5433         u32 cb_parsed_length = 0;
5434         u32 cb_patched_cur_length = 0;
5435         int rc = 0;
5436
5437         /* cb_user_size is more than 0 so loop will always be executed */
5438         while (cb_parsed_length < parser->user_cb_size) {
5439                 enum packet_id pkt_id;
5440                 u16 pkt_size;
5441                 u32 new_pkt_size = 0;
5442                 struct gaudi_packet *user_pkt, *kernel_pkt;
5443
5444                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5445                 kernel_pkt = parser->patched_cb->kernel_address +
5446                                         cb_patched_cur_length;
5447
5448                 pkt_id = (enum packet_id) (
5449                                 (le64_to_cpu(user_pkt->header) &
5450                                 PACKET_HEADER_PACKET_ID_MASK) >>
5451                                         PACKET_HEADER_PACKET_ID_SHIFT);
5452
5453                 if (!validate_packet_id(pkt_id)) {
5454                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5455                         rc = -EINVAL;
5456                         break;
5457                 }
5458
5459                 pkt_size = gaudi_packet_sizes[pkt_id];
5460                 cb_parsed_length += pkt_size;
5461                 if (cb_parsed_length > parser->user_cb_size) {
5462                         dev_err(hdev->dev,
5463                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5464                         rc = -EINVAL;
5465                         break;
5466                 }
5467
5468                 switch (pkt_id) {
5469                 case PACKET_LIN_DMA:
5470                         rc = gaudi_patch_dma_packet(hdev, parser,
5471                                         (struct packet_lin_dma *) user_pkt,
5472                                         (struct packet_lin_dma *) kernel_pkt,
5473                                         &new_pkt_size);
5474                         cb_patched_cur_length += new_pkt_size;
5475                         break;
5476
5477                 case PACKET_MSG_PROT:
5478                         dev_err(hdev->dev,
5479                                 "User not allowed to use MSG_PROT\n");
5480                         rc = -EPERM;
5481                         break;
5482
5483                 case PACKET_CP_DMA:
5484                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5485                         rc = -EPERM;
5486                         break;
5487
5488                 case PACKET_STOP:
5489                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5490                         rc = -EPERM;
5491                         break;
5492
5493                 case PACKET_WREG_32:
5494                 case PACKET_WREG_BULK:
5495                 case PACKET_MSG_LONG:
5496                 case PACKET_MSG_SHORT:
5497                 case PACKET_REPEAT:
5498                 case PACKET_FENCE:
5499                 case PACKET_NOP:
5500                 case PACKET_ARB_POINT:
5501                 case PACKET_LOAD_AND_EXE:
5502                         memcpy(kernel_pkt, user_pkt, pkt_size);
5503                         cb_patched_cur_length += pkt_size;
5504                         break;
5505
5506                 default:
5507                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5508                                 pkt_id);
5509                         rc = -EINVAL;
5510                         break;
5511                 }
5512
5513                 if (rc)
5514                         break;
5515         }
5516
5517         return rc;
5518 }
5519
5520 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5521                 struct hl_cs_parser *parser)
5522 {
5523         u64 patched_cb_handle;
5524         u32 patched_cb_size;
5525         struct hl_cb *user_cb;
5526         int rc;
5527
5528         /*
5529          * The new CB should have space at the end for two MSG_PROT pkt:
5530          * 1. A packet that will act as a completion packet
5531          * 2. A packet that will generate MSI interrupt
5532          */
5533         if (parser->completion)
5534                 parser->patched_cb_size = parser->user_cb_size +
5535                                 sizeof(struct packet_msg_prot) * 2;
5536         else
5537                 parser->patched_cb_size = parser->user_cb_size;
5538
5539         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5540                                 parser->patched_cb_size, false, false,
5541                                 &patched_cb_handle);
5542
5543         if (rc) {
5544                 dev_err(hdev->dev,
5545                         "Failed to allocate patched CB for DMA CS %d\n",
5546                         rc);
5547                 return rc;
5548         }
5549
5550         patched_cb_handle >>= PAGE_SHIFT;
5551         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5552                                 (u32) patched_cb_handle);
5553         /* hl_cb_get should never fail */
5554         if (!parser->patched_cb) {
5555                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5556                         (u32) patched_cb_handle);
5557                 rc = -EFAULT;
5558                 goto out;
5559         }
5560
5561         /*
5562          * The check that parser->user_cb_size <= parser->user_cb->size was done
5563          * in validate_queue_index().
5564          */
5565         memcpy(parser->patched_cb->kernel_address,
5566                 parser->user_cb->kernel_address,
5567                 parser->user_cb_size);
5568
5569         patched_cb_size = parser->patched_cb_size;
5570
5571         /* Validate patched CB instead of user CB */
5572         user_cb = parser->user_cb;
5573         parser->user_cb = parser->patched_cb;
5574         rc = gaudi_validate_cb(hdev, parser, true);
5575         parser->user_cb = user_cb;
5576
5577         if (rc) {
5578                 hl_cb_put(parser->patched_cb);
5579                 goto out;
5580         }
5581
5582         if (patched_cb_size != parser->patched_cb_size) {
5583                 dev_err(hdev->dev, "user CB size mismatch\n");
5584                 hl_cb_put(parser->patched_cb);
5585                 rc = -EINVAL;
5586                 goto out;
5587         }
5588
5589 out:
5590         /*
5591          * Always call cb destroy here because we still have 1 reference
5592          * to it by calling cb_get earlier. After the job will be completed,
5593          * cb_put will release it, but here we want to remove it from the
5594          * idr
5595          */
5596         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5597                                         patched_cb_handle << PAGE_SHIFT);
5598
5599         return rc;
5600 }
5601
5602 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5603                 struct hl_cs_parser *parser)
5604 {
5605         u64 patched_cb_handle;
5606         int rc;
5607
5608         rc = gaudi_validate_cb(hdev, parser, false);
5609
5610         if (rc)
5611                 goto free_userptr;
5612
5613         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5614                                 parser->patched_cb_size, false, false,
5615                                 &patched_cb_handle);
5616         if (rc) {
5617                 dev_err(hdev->dev,
5618                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5619                 goto free_userptr;
5620         }
5621
5622         patched_cb_handle >>= PAGE_SHIFT;
5623         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5624                                 (u32) patched_cb_handle);
5625         /* hl_cb_get should never fail here */
5626         if (!parser->patched_cb) {
5627                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5628                                 (u32) patched_cb_handle);
5629                 rc = -EFAULT;
5630                 goto out;
5631         }
5632
5633         rc = gaudi_patch_cb(hdev, parser);
5634
5635         if (rc)
5636                 hl_cb_put(parser->patched_cb);
5637
5638 out:
5639         /*
5640          * Always call cb destroy here because we still have 1 reference
5641          * to it by calling cb_get earlier. After the job will be completed,
5642          * cb_put will release it, but here we want to remove it from the
5643          * idr
5644          */
5645         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5646                                 patched_cb_handle << PAGE_SHIFT);
5647
5648 free_userptr:
5649         if (rc)
5650                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5651         return rc;
5652 }
5653
5654 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5655                                         struct hl_cs_parser *parser)
5656 {
5657         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5658         struct gaudi_device *gaudi = hdev->asic_specific;
5659         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5660                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5661
5662         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5663                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5664                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5665                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5666                                 parser->hw_queue_id);
5667                 return -EINVAL;
5668         }
5669
5670         /* For internal queue jobs just check if CB address is valid */
5671         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5672                                         parser->user_cb_size,
5673                                         asic_prop->sram_user_base_address,
5674                                         asic_prop->sram_end_address))
5675                 return 0;
5676
5677         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5678                                         parser->user_cb_size,
5679                                         asic_prop->dram_user_base_address,
5680                                         asic_prop->dram_end_address))
5681                 return 0;
5682
5683         /* PMMU and HPMMU addresses are equal, check only one of them */
5684         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5685                                         parser->user_cb_size,
5686                                         asic_prop->pmmu.start_addr,
5687                                         asic_prop->pmmu.end_addr))
5688                 return 0;
5689
5690         dev_err(hdev->dev,
5691                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5692                 parser->user_cb, parser->user_cb_size);
5693
5694         return -EFAULT;
5695 }
5696
5697 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5698 {
5699         struct gaudi_device *gaudi = hdev->asic_specific;
5700
5701         if (parser->queue_type == QUEUE_TYPE_INT)
5702                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5703
5704         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5705                 return gaudi_parse_cb_mmu(hdev, parser);
5706         else
5707                 return gaudi_parse_cb_no_mmu(hdev, parser);
5708 }
5709
5710 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5711                                         void *kernel_address, u32 len,
5712                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5713                                         bool eb)
5714 {
5715         struct gaudi_device *gaudi = hdev->asic_specific;
5716         struct packet_msg_prot *cq_pkt;
5717         u32 tmp;
5718
5719         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5720
5721         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5722         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5723
5724         if (eb)
5725                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5726
5727         cq_pkt->ctl = cpu_to_le32(tmp);
5728         cq_pkt->value = cpu_to_le32(cq_val);
5729         cq_pkt->addr = cpu_to_le64(cq_addr);
5730
5731         cq_pkt++;
5732
5733         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5734         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5735         cq_pkt->ctl = cpu_to_le32(tmp);
5736         cq_pkt->value = cpu_to_le32(1);
5737
5738         if (!gaudi->multi_msi_mode)
5739                 msi_vec = 0;
5740
5741         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5742 }
5743
5744 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5745 {
5746         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5747 }
5748
5749 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5750                                         u32 size, u64 val)
5751 {
5752         struct packet_lin_dma *lin_dma_pkt;
5753         struct hl_cs_job *job;
5754         u32 cb_size, ctl, err_cause;
5755         struct hl_cb *cb;
5756         u64 id;
5757         int rc;
5758
5759         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5760         if (!cb)
5761                 return -EFAULT;
5762
5763         lin_dma_pkt = cb->kernel_address;
5764         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5765         cb_size = sizeof(*lin_dma_pkt);
5766
5767         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5768         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5769         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5770         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5771         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5772
5773         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5774         lin_dma_pkt->src_addr = cpu_to_le64(val);
5775         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5776         lin_dma_pkt->tsize = cpu_to_le32(size);
5777
5778         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5779         if (!job) {
5780                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5781                 rc = -ENOMEM;
5782                 goto release_cb;
5783         }
5784
5785         /* Verify DMA is OK */
5786         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5787         if (err_cause && !hdev->init_done) {
5788                 dev_dbg(hdev->dev,
5789                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5790                         err_cause);
5791                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5792         }
5793
5794         job->id = 0;
5795         job->user_cb = cb;
5796         atomic_inc(&job->user_cb->cs_cnt);
5797         job->user_cb_size = cb_size;
5798         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5799         job->patched_cb = job->user_cb;
5800         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5801
5802         hl_debugfs_add_job(hdev, job);
5803
5804         rc = gaudi_send_job_on_qman0(hdev, job);
5805         hl_debugfs_remove_job(hdev, job);
5806         kfree(job);
5807         atomic_dec(&cb->cs_cnt);
5808
5809         /* Verify DMA is OK */
5810         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5811         if (err_cause) {
5812                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5813                 rc = -EIO;
5814                 if (!hdev->init_done) {
5815                         dev_dbg(hdev->dev,
5816                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5817                                 err_cause);
5818                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5819                 }
5820         }
5821
5822 release_cb:
5823         id = cb->id;
5824         hl_cb_put(cb);
5825         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5826
5827         return rc;
5828 }
5829
5830 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5831                                         u32 num_regs, u32 val)
5832 {
5833         struct packet_msg_long *pkt;
5834         struct hl_cs_job *job;
5835         u32 cb_size, ctl;
5836         struct hl_cb *cb;
5837         int i, rc;
5838
5839         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5840
5841         if (cb_size > SZ_2M) {
5842                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5843                 return -ENOMEM;
5844         }
5845
5846         cb = hl_cb_kernel_create(hdev, cb_size, false);
5847         if (!cb)
5848                 return -EFAULT;
5849
5850         pkt = cb->kernel_address;
5851
5852         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5853         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5854         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5855         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5856         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5857
5858         for (i = 0; i < num_regs ; i++, pkt++) {
5859                 pkt->ctl = cpu_to_le32(ctl);
5860                 pkt->value = cpu_to_le32(val);
5861                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5862         }
5863
5864         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5865         if (!job) {
5866                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5867                 rc = -ENOMEM;
5868                 goto release_cb;
5869         }
5870
5871         job->id = 0;
5872         job->user_cb = cb;
5873         atomic_inc(&job->user_cb->cs_cnt);
5874         job->user_cb_size = cb_size;
5875         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5876         job->patched_cb = job->user_cb;
5877         job->job_cb_size = cb_size;
5878
5879         hl_debugfs_add_job(hdev, job);
5880
5881         rc = gaudi_send_job_on_qman0(hdev, job);
5882         hl_debugfs_remove_job(hdev, job);
5883         kfree(job);
5884         atomic_dec(&cb->cs_cnt);
5885
5886 release_cb:
5887         hl_cb_put(cb);
5888         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5889
5890         return rc;
5891 }
5892
5893 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5894                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5895 {
5896         struct hl_ctx *ctx;
5897         struct hl_pending_cb *pending_cb;
5898         struct packet_msg_long *pkt;
5899         u32 cb_size, ctl;
5900         struct hl_cb *cb;
5901         int i, rc;
5902
5903         mutex_lock(&hdev->fpriv_list_lock);
5904         ctx = hdev->compute_ctx;
5905
5906         /* If no compute context available or context is going down
5907          * memset registers directly
5908          */
5909         if (!ctx || kref_read(&ctx->refcount) == 0) {
5910                 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5911                 mutex_unlock(&hdev->fpriv_list_lock);
5912                 return rc;
5913         }
5914
5915         mutex_unlock(&hdev->fpriv_list_lock);
5916
5917         cb_size = (sizeof(*pkt) * num_regs) +
5918                         sizeof(struct packet_msg_prot) * 2;
5919
5920         if (cb_size > SZ_2M) {
5921                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5922                 return -ENOMEM;
5923         }
5924
5925         pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5926         if (!pending_cb)
5927                 return -ENOMEM;
5928
5929         cb = hl_cb_kernel_create(hdev, cb_size, false);
5930         if (!cb) {
5931                 kfree(pending_cb);
5932                 return -EFAULT;
5933         }
5934
5935         pkt = cb->kernel_address;
5936
5937         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5938         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5939         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5940         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5941         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5942
5943         for (i = 0; i < num_regs ; i++, pkt++) {
5944                 pkt->ctl = cpu_to_le32(ctl);
5945                 pkt->value = cpu_to_le32(val);
5946                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5947         }
5948
5949         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5950
5951         pending_cb->cb = cb;
5952         pending_cb->cb_size = cb_size;
5953         /* The queue ID MUST be an external queue ID. Otherwise, we will
5954          * have undefined behavior
5955          */
5956         pending_cb->hw_queue_id = hw_queue_id;
5957
5958         spin_lock(&ctx->pending_cb_lock);
5959         list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5960         spin_unlock(&ctx->pending_cb_lock);
5961
5962         return 0;
5963 }
5964
5965 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5966 {
5967         u64 base_addr;
5968         u32 num_regs;
5969         int rc;
5970
5971         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5972         num_regs = NUM_OF_SOB_IN_BLOCK;
5973         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5974         if (rc) {
5975                 dev_err(hdev->dev, "failed resetting SM registers");
5976                 return -ENOMEM;
5977         }
5978
5979         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5980         num_regs = NUM_OF_SOB_IN_BLOCK;
5981         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5982         if (rc) {
5983                 dev_err(hdev->dev, "failed resetting SM registers");
5984                 return -ENOMEM;
5985         }
5986
5987         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5988         num_regs = NUM_OF_SOB_IN_BLOCK;
5989         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5990         if (rc) {
5991                 dev_err(hdev->dev, "failed resetting SM registers");
5992                 return -ENOMEM;
5993         }
5994
5995         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5996         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5997         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5998         if (rc) {
5999                 dev_err(hdev->dev, "failed resetting SM registers");
6000                 return -ENOMEM;
6001         }
6002
6003         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
6004         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6005         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6006         if (rc) {
6007                 dev_err(hdev->dev, "failed resetting SM registers");
6008                 return -ENOMEM;
6009         }
6010
6011         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6012         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6013         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6014         if (rc) {
6015                 dev_err(hdev->dev, "failed resetting SM registers");
6016                 return -ENOMEM;
6017         }
6018
6019         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6020                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6021         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6022         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6023         if (rc) {
6024                 dev_err(hdev->dev, "failed resetting SM registers");
6025                 return -ENOMEM;
6026         }
6027
6028         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6029                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6030         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6031         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6032         if (rc) {
6033                 dev_err(hdev->dev, "failed resetting SM registers");
6034                 return -ENOMEM;
6035         }
6036
6037         return 0;
6038 }
6039
6040 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6041 {
6042         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6043                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6044         int i;
6045
6046         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6047                 u64 sob_addr = CFG_BASE +
6048                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6049                                 (i * sob_delta);
6050                 u32 dma_offset = i * DMA_CORE_OFFSET;
6051
6052                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6053                                 lower_32_bits(sob_addr));
6054                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6055                                 upper_32_bits(sob_addr));
6056                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6057
6058                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6059                  * modified by the user for SRAM reduction
6060                  */
6061                 if (i > 1)
6062                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6063                                                                 0x00000001);
6064         }
6065 }
6066
6067 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6068 {
6069         u32 qman_offset;
6070         int i;
6071
6072         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6073                 qman_offset = i * DMA_QMAN_OFFSET;
6074                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6075         }
6076
6077         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6078                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6079                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6080         }
6081
6082         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6083                 qman_offset = i * TPC_QMAN_OFFSET;
6084                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6085         }
6086
6087         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6088                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6089                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6090                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6091         }
6092 }
6093
6094 static int gaudi_restore_user_registers(struct hl_device *hdev)
6095 {
6096         int rc;
6097
6098         rc = gaudi_restore_sm_registers(hdev);
6099         if (rc)
6100                 return rc;
6101
6102         gaudi_restore_dma_registers(hdev);
6103         gaudi_restore_qm_registers(hdev);
6104
6105         return 0;
6106 }
6107
6108 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6109 {
6110         return gaudi_restore_user_registers(hdev);
6111 }
6112
6113 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6114 {
6115         struct asic_fixed_properties *prop = &hdev->asic_prop;
6116         struct gaudi_device *gaudi = hdev->asic_specific;
6117         u64 addr = prop->mmu_pgt_addr;
6118         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6119
6120         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6121                 return 0;
6122
6123         return gaudi_memset_device_memory(hdev, addr, size, 0);
6124 }
6125
6126 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6127 {
6128
6129 }
6130
6131 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6132                         bool user_address, u32 *val)
6133 {
6134         struct asic_fixed_properties *prop = &hdev->asic_prop;
6135         struct gaudi_device *gaudi = hdev->asic_specific;
6136         u64 hbm_bar_addr, host_phys_end;
6137         int rc = 0;
6138
6139         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6140
6141         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6142
6143                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6144                                 (hdev->clock_gating_mask &
6145                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6146
6147                         dev_err_ratelimited(hdev->dev,
6148                                 "Can't read register - clock gating is enabled!\n");
6149                         rc = -EFAULT;
6150                 } else {
6151                         *val = RREG32(addr - CFG_BASE);
6152                 }
6153
6154         } else if ((addr >= SRAM_BASE_ADDR) &&
6155                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6156                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6157                                 (addr - SRAM_BASE_ADDR));
6158         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6159                 u64 bar_base_addr = DRAM_PHYS_BASE +
6160                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6161
6162                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6163                 if (hbm_bar_addr != U64_MAX) {
6164                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6165                                                 (addr - bar_base_addr));
6166
6167                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6168                                                 hbm_bar_addr);
6169                 }
6170                 if (hbm_bar_addr == U64_MAX)
6171                         rc = -EIO;
6172         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6173                         user_address && !iommu_present(&pci_bus_type)) {
6174                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6175         } else {
6176                 rc = -EFAULT;
6177         }
6178
6179         return rc;
6180 }
6181
6182 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6183                         bool user_address, u32 val)
6184 {
6185         struct asic_fixed_properties *prop = &hdev->asic_prop;
6186         struct gaudi_device *gaudi = hdev->asic_specific;
6187         u64 hbm_bar_addr, host_phys_end;
6188         int rc = 0;
6189
6190         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6191
6192         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6193
6194                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6195                                 (hdev->clock_gating_mask &
6196                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6197
6198                         dev_err_ratelimited(hdev->dev,
6199                                 "Can't write register - clock gating is enabled!\n");
6200                         rc = -EFAULT;
6201                 } else {
6202                         WREG32(addr - CFG_BASE, val);
6203                 }
6204
6205         } else if ((addr >= SRAM_BASE_ADDR) &&
6206                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6207                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6208                                         (addr - SRAM_BASE_ADDR));
6209         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6210                 u64 bar_base_addr = DRAM_PHYS_BASE +
6211                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6212
6213                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6214                 if (hbm_bar_addr != U64_MAX) {
6215                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6216                                                 (addr - bar_base_addr));
6217
6218                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6219                                                 hbm_bar_addr);
6220                 }
6221                 if (hbm_bar_addr == U64_MAX)
6222                         rc = -EIO;
6223         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6224                         user_address && !iommu_present(&pci_bus_type)) {
6225                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6226         } else {
6227                 rc = -EFAULT;
6228         }
6229
6230         return rc;
6231 }
6232
6233 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6234                                 bool user_address, u64 *val)
6235 {
6236         struct asic_fixed_properties *prop = &hdev->asic_prop;
6237         struct gaudi_device *gaudi = hdev->asic_specific;
6238         u64 hbm_bar_addr, host_phys_end;
6239         int rc = 0;
6240
6241         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6242
6243         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6244
6245                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6246                                 (hdev->clock_gating_mask &
6247                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6248
6249                         dev_err_ratelimited(hdev->dev,
6250                                 "Can't read register - clock gating is enabled!\n");
6251                         rc = -EFAULT;
6252                 } else {
6253                         u32 val_l = RREG32(addr - CFG_BASE);
6254                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6255
6256                         *val = (((u64) val_h) << 32) | val_l;
6257                 }
6258
6259         } else if ((addr >= SRAM_BASE_ADDR) &&
6260                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6261                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6262                                 (addr - SRAM_BASE_ADDR));
6263         } else if (addr <=
6264                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6265                 u64 bar_base_addr = DRAM_PHYS_BASE +
6266                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6267
6268                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6269                 if (hbm_bar_addr != U64_MAX) {
6270                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6271                                                 (addr - bar_base_addr));
6272
6273                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6274                                                 hbm_bar_addr);
6275                 }
6276                 if (hbm_bar_addr == U64_MAX)
6277                         rc = -EIO;
6278         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6279                         user_address && !iommu_present(&pci_bus_type)) {
6280                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6281         } else {
6282                 rc = -EFAULT;
6283         }
6284
6285         return rc;
6286 }
6287
6288 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6289                                 bool user_address, u64 val)
6290 {
6291         struct asic_fixed_properties *prop = &hdev->asic_prop;
6292         struct gaudi_device *gaudi = hdev->asic_specific;
6293         u64 hbm_bar_addr, host_phys_end;
6294         int rc = 0;
6295
6296         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6297
6298         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6299
6300                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6301                                 (hdev->clock_gating_mask &
6302                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6303
6304                         dev_err_ratelimited(hdev->dev,
6305                                 "Can't write register - clock gating is enabled!\n");
6306                         rc = -EFAULT;
6307                 } else {
6308                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6309                         WREG32(addr + sizeof(u32) - CFG_BASE,
6310                                 upper_32_bits(val));
6311                 }
6312
6313         } else if ((addr >= SRAM_BASE_ADDR) &&
6314                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6315                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6316                                         (addr - SRAM_BASE_ADDR));
6317         } else if (addr <=
6318                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6319                 u64 bar_base_addr = DRAM_PHYS_BASE +
6320                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6321
6322                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6323                 if (hbm_bar_addr != U64_MAX) {
6324                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6325                                                 (addr - bar_base_addr));
6326
6327                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6328                                                 hbm_bar_addr);
6329                 }
6330                 if (hbm_bar_addr == U64_MAX)
6331                         rc = -EIO;
6332         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6333                         user_address && !iommu_present(&pci_bus_type)) {
6334                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6335         } else {
6336                 rc = -EFAULT;
6337         }
6338
6339         return rc;
6340 }
6341
6342 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6343                                         u32 size_to_dma, dma_addr_t dma_addr)
6344 {
6345         u32 err_cause, val;
6346         u64 dma_offset;
6347         int rc;
6348
6349         dma_offset = dma_id * DMA_CORE_OFFSET;
6350
6351         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6352         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6353         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6354         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6355         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6356         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6357                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6358
6359         rc = hl_poll_timeout(
6360                 hdev,
6361                 mmDMA0_CORE_STS0 + dma_offset,
6362                 val,
6363                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6364                 0,
6365                 1000000);
6366
6367         if (rc) {
6368                 dev_err(hdev->dev,
6369                         "DMA %d timed-out during reading of 0x%llx\n",
6370                         dma_id, addr);
6371                 return -EIO;
6372         }
6373
6374         /* Verify DMA is OK */
6375         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6376         if (err_cause) {
6377                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6378                 dev_dbg(hdev->dev,
6379                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6380                         err_cause);
6381                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6382
6383                 return -EIO;
6384         }
6385
6386         return 0;
6387 }
6388
6389 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6390                                 void *blob_addr)
6391 {
6392         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6393         struct gaudi_device *gaudi = hdev->asic_specific;
6394         u64 dma_offset, qm_offset;
6395         dma_addr_t dma_addr;
6396         void *kernel_addr;
6397         bool is_eng_idle;
6398         int rc = 0, dma_id;
6399
6400         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6401                                                 hdev, SZ_2M,
6402                                                 &dma_addr,
6403                                                 GFP_KERNEL | __GFP_ZERO);
6404
6405         if (!kernel_addr)
6406                 return -ENOMEM;
6407
6408         mutex_lock(&gaudi->clk_gate_mutex);
6409
6410         hdev->asic_funcs->disable_clock_gating(hdev);
6411
6412         hdev->asic_funcs->hw_queues_lock(hdev);
6413
6414         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6415         dma_offset = dma_id * DMA_CORE_OFFSET;
6416         qm_offset = dma_id * DMA_QMAN_OFFSET;
6417         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6418         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6419
6420         if (!is_eng_idle) {
6421                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6422                 dma_offset = dma_id * DMA_CORE_OFFSET;
6423                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6424                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6425                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6426
6427                 if (!is_eng_idle) {
6428                         dev_err_ratelimited(hdev->dev,
6429                                 "Can't read via DMA because it is BUSY\n");
6430                         rc = -EAGAIN;
6431                         goto out;
6432                 }
6433         }
6434
6435         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6436         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6437                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6438
6439         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6440          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6441          * ASID
6442          */
6443         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6444
6445         /* Verify DMA is OK */
6446         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6447         if (err_cause) {
6448                 dev_dbg(hdev->dev,
6449                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6450                         err_cause);
6451                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6452         }
6453
6454         pos = 0;
6455         size_left = size;
6456         size_to_dma = SZ_2M;
6457
6458         while (size_left > 0) {
6459
6460                 if (size_left < SZ_2M)
6461                         size_to_dma = size_left;
6462
6463                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6464                                                 dma_addr);
6465                 if (rc)
6466                         break;
6467
6468                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6469
6470                 if (size_left <= SZ_2M)
6471                         break;
6472
6473                 pos += SZ_2M;
6474                 addr += SZ_2M;
6475                 size_left -= SZ_2M;
6476         }
6477
6478         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6479          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6480          * ASID
6481          */
6482         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6483                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6484
6485         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6486
6487 out:
6488         hdev->asic_funcs->hw_queues_unlock(hdev);
6489
6490         hdev->asic_funcs->set_clock_gating(hdev);
6491
6492         mutex_unlock(&gaudi->clk_gate_mutex);
6493
6494         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6495                                                 dma_addr);
6496
6497         return rc;
6498 }
6499
6500 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6501 {
6502         struct gaudi_device *gaudi = hdev->asic_specific;
6503
6504         if (hdev->hard_reset_pending)
6505                 return U64_MAX;
6506
6507         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6508                         (addr - gaudi->hbm_bar_cur_addr));
6509 }
6510
6511 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6512 {
6513         struct gaudi_device *gaudi = hdev->asic_specific;
6514
6515         if (hdev->hard_reset_pending)
6516                 return;
6517
6518         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6519                         (addr - gaudi->hbm_bar_cur_addr));
6520 }
6521
6522 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6523 {
6524         /* mask to zero the MMBP and ASID bits */
6525         WREG32_AND(reg, ~0x7FF);
6526         WREG32_OR(reg, asid);
6527 }
6528
6529 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6530 {
6531         struct gaudi_device *gaudi = hdev->asic_specific;
6532
6533         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6534                 return;
6535
6536         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6537                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6538                 return;
6539         }
6540
6541         mutex_lock(&gaudi->clk_gate_mutex);
6542
6543         hdev->asic_funcs->disable_clock_gating(hdev);
6544
6545         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6547         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6548         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6550
6551         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6555         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6556
6557         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6560         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6562
6563         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6568
6569         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6574
6575         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6580
6581         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6586
6587         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6592
6593         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6601
6602         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6609
6610         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6617
6618         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6625
6626         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6628         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6633
6634         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6637         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6638         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6641
6642         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6644         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6649
6650         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6652         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6656         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6657
6658         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6660         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6665
6666         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6669         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6670         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6671         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6672         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6673         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6674         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6676
6677         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6678         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6679         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6680         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6681         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6682         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6683         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6684         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6685         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6686         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6687         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6688         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6689
6690         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6691                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6692                                 asid);
6693                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6694                                 asid);
6695                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6696                                 asid);
6697                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6698                                 asid);
6699                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6700                                 asid);
6701         }
6702
6703         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6704                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6705                                 asid);
6706                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6707                                 asid);
6708                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6709                                 asid);
6710                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6711                                 asid);
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6713                                 asid);
6714         }
6715
6716         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6717                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6718                                 asid);
6719                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6720                                 asid);
6721                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6722                                 asid);
6723                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6724                                 asid);
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6726                                 asid);
6727         }
6728
6729         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6730                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6731                                 asid);
6732                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6733                                 asid);
6734                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6735                                 asid);
6736                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6737                                 asid);
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6739                                 asid);
6740         }
6741
6742         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6743                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6744                                 asid);
6745                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6746                                 asid);
6747                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6748                                 asid);
6749                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6750                                 asid);
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6752                                 asid);
6753         }
6754
6755         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6756                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6757                                 asid);
6758                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6759                                 asid);
6760                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6761                                 asid);
6762                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6763                                 asid);
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6765                                 asid);
6766         }
6767
6768         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6769                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6770                                 asid);
6771                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6772                                 asid);
6773                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6774                                 asid);
6775                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6776                                 asid);
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6778                                 asid);
6779         }
6780
6781         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6782                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6783                                 asid);
6784                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6785                                 asid);
6786                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6787                                 asid);
6788                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6789                                 asid);
6790                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6791                                 asid);
6792         }
6793
6794         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6795                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6796                                 asid);
6797                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6798                                 asid);
6799                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6800                                 asid);
6801                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6802                                 asid);
6803                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6804                                 asid);
6805         }
6806
6807         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6808                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6809                                 asid);
6810                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6811                                 asid);
6812                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6813                                 asid);
6814                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6815                                 asid);
6816                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6817                                 asid);
6818         }
6819
6820         hdev->asic_funcs->set_clock_gating(hdev);
6821
6822         mutex_unlock(&gaudi->clk_gate_mutex);
6823 }
6824
6825 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6826                 struct hl_cs_job *job)
6827 {
6828         struct packet_msg_prot *fence_pkt;
6829         u32 *fence_ptr;
6830         dma_addr_t fence_dma_addr;
6831         struct hl_cb *cb;
6832         u32 tmp, timeout, dma_offset;
6833         int rc;
6834
6835         if (hdev->pldm)
6836                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6837         else
6838                 timeout = HL_DEVICE_TIMEOUT_USEC;
6839
6840         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6841                 dev_err_ratelimited(hdev->dev,
6842                         "Can't send driver job on QMAN0 because the device is not idle\n");
6843                 return -EBUSY;
6844         }
6845
6846         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6847                                                         &fence_dma_addr);
6848         if (!fence_ptr) {
6849                 dev_err(hdev->dev,
6850                         "Failed to allocate fence memory for QMAN0\n");
6851                 return -ENOMEM;
6852         }
6853
6854         cb = job->patched_cb;
6855
6856         fence_pkt = cb->kernel_address +
6857                         job->job_cb_size - sizeof(struct packet_msg_prot);
6858
6859         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6860         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6861         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6862
6863         fence_pkt->ctl = cpu_to_le32(tmp);
6864         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6865         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6866
6867         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6868
6869         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6870
6871         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6872                                         job->job_cb_size, cb->bus_address);
6873         if (rc) {
6874                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6875                 goto free_fence_ptr;
6876         }
6877
6878         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6879                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6880                                 timeout, true);
6881
6882         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6883
6884         if (rc == -ETIMEDOUT) {
6885                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6886                 goto free_fence_ptr;
6887         }
6888
6889 free_fence_ptr:
6890         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6891                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6892
6893         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6894                                         fence_dma_addr);
6895         return rc;
6896 }
6897
6898 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6899 {
6900         if (event_type >= GAUDI_EVENT_SIZE)
6901                 goto event_not_supported;
6902
6903         if (!gaudi_irq_map_table[event_type].valid)
6904                 goto event_not_supported;
6905
6906         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6907
6908         return;
6909
6910 event_not_supported:
6911         snprintf(desc, size, "N/A");
6912 }
6913
6914 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6915                                                         u32 x_y, bool is_write)
6916 {
6917         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6918
6919         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6920                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6921
6922         switch (x_y) {
6923         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6924         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6925                 dma_id[0] = 0;
6926                 dma_id[1] = 2;
6927                 break;
6928         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6929         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6930                 dma_id[0] = 1;
6931                 dma_id[1] = 3;
6932                 break;
6933         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6934         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6935                 dma_id[0] = 4;
6936                 dma_id[1] = 6;
6937                 break;
6938         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6939         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6940                 dma_id[0] = 5;
6941                 dma_id[1] = 7;
6942                 break;
6943         default:
6944                 goto unknown_initiator;
6945         }
6946
6947         for (i = 0 ; i < 2 ; i++) {
6948                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6949                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6950         }
6951
6952         switch (x_y) {
6953         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6954         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6955                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6956                         return "DMA0";
6957                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6958                         return "DMA2";
6959                 else
6960                         return "DMA0 or DMA2";
6961         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6962         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6963                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6964                         return "DMA1";
6965                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6966                         return "DMA3";
6967                 else
6968                         return "DMA1 or DMA3";
6969         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6970         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6971                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6972                         return "DMA4";
6973                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6974                         return "DMA6";
6975                 else
6976                         return "DMA4 or DMA6";
6977         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6978         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6979                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6980                         return "DMA5";
6981                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6982                         return "DMA7";
6983                 else
6984                         return "DMA5 or DMA7";
6985         }
6986
6987 unknown_initiator:
6988         return "unknown initiator";
6989 }
6990
6991 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6992                                                         bool is_write)
6993 {
6994         u32 val, x_y, axi_id;
6995
6996         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6997                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6998         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6999                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
7000         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
7001                         RAZWI_INITIATOR_AXI_ID_SHIFT);
7002
7003         switch (x_y) {
7004         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
7005                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7006                         return "TPC0";
7007                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7008                         return "NIC0";
7009                 break;
7010         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7011                 return "TPC1";
7012         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7013         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7014                 return "MME0";
7015         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7016         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7017                 return "MME1";
7018         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7019                 return "TPC2";
7020         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7021                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7022                         return "TPC3";
7023                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7024                         return "PCI";
7025                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7026                         return "CPU";
7027                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7028                         return "PSOC";
7029                 break;
7030         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7031         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7032         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7033         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7034         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7035         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7036         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7037         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7038                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7039         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7040                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7041                         return "TPC4";
7042                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7043                         return "NIC1";
7044                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7045                         return "NIC2";
7046                 break;
7047         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7048                 return "TPC5";
7049         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7050         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7051                 return "MME2";
7052         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7053         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7054                 return "MME3";
7055         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7056                 return "TPC6";
7057         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7058                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7059                         return "TPC7";
7060                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7061                         return "NIC4";
7062                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7063                         return "NIC5";
7064                 break;
7065         default:
7066                 break;
7067         }
7068
7069         dev_err(hdev->dev,
7070                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7071                 val,
7072                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7073                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7074                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7075                         RAZWI_INITIATOR_AXI_ID_MASK);
7076
7077         return "unknown initiator";
7078 }
7079
7080 static void gaudi_print_razwi_info(struct hl_device *hdev)
7081 {
7082         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7083                 dev_err_ratelimited(hdev->dev,
7084                         "RAZWI event caused by illegal write of %s\n",
7085                         gaudi_get_razwi_initiator_name(hdev, true));
7086                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7087         }
7088
7089         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7090                 dev_err_ratelimited(hdev->dev,
7091                         "RAZWI event caused by illegal read of %s\n",
7092                         gaudi_get_razwi_initiator_name(hdev, false));
7093                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7094         }
7095 }
7096
7097 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7098 {
7099         struct gaudi_device *gaudi = hdev->asic_specific;
7100         u64 addr;
7101         u32 val;
7102
7103         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7104                 return;
7105
7106         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7107         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7108                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7109                 addr <<= 32;
7110                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7111
7112                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7113                                         addr);
7114
7115                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7116         }
7117
7118         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7119         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7120                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7121                 addr <<= 32;
7122                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7123
7124                 dev_err_ratelimited(hdev->dev,
7125                                 "MMU access error on va 0x%llx\n", addr);
7126
7127                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7128         }
7129 }
7130
7131 /*
7132  *  +-------------------+------------------------------------------------------+
7133  *  | Configuration Reg |                     Description                      |
7134  *  |      Address      |                                                      |
7135  *  +-------------------+------------------------------------------------------+
7136  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7137  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7138  *  |                   |0xF34 memory wrappers 63:32                           |
7139  *  |                   |0xF38 memory wrappers 95:64                           |
7140  *  |                   |0xF3C memory wrappers 127:96                          |
7141  *  +-------------------+------------------------------------------------------+
7142  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7143  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7144  *  |                   |0xF44 memory wrappers 63:32                           |
7145  *  |                   |0xF48 memory wrappers 95:64                           |
7146  *  |                   |0xF4C memory wrappers 127:96                          |
7147  *  +-------------------+------------------------------------------------------+
7148  */
7149 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7150                 struct ecc_info_extract_params *params, u64 *ecc_address,
7151                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7152 {
7153         struct gaudi_device *gaudi = hdev->asic_specific;
7154         u32 i, num_mem_regs, reg, err_bit;
7155         u64 err_addr, err_word = 0;
7156         int rc = 0;
7157
7158         num_mem_regs = params->num_memories / 32 +
7159                         ((params->num_memories % 32) ? 1 : 0);
7160
7161         if (params->block_address >= CFG_BASE)
7162                 params->block_address -= CFG_BASE;
7163
7164         if (params->derr)
7165                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7166         else
7167                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7168
7169         if (params->disable_clock_gating) {
7170                 mutex_lock(&gaudi->clk_gate_mutex);
7171                 hdev->asic_funcs->disable_clock_gating(hdev);
7172         }
7173
7174         /* Set invalid wrapper index */
7175         *memory_wrapper_idx = 0xFF;
7176
7177         /* Iterate through memory wrappers, a single bit must be set */
7178         for (i = 0 ; i < num_mem_regs ; i++) {
7179                 err_addr += i * 4;
7180                 err_word = RREG32(err_addr);
7181                 if (err_word) {
7182                         err_bit = __ffs(err_word);
7183                         *memory_wrapper_idx = err_bit + (32 * i);
7184                         break;
7185                 }
7186         }
7187
7188         if (*memory_wrapper_idx == 0xFF) {
7189                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7190                 rc = -EINVAL;
7191                 goto enable_clk_gate;
7192         }
7193
7194         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7195                         *memory_wrapper_idx);
7196
7197         *ecc_address =
7198                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7199         *ecc_syndrom =
7200                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7201
7202         /* Clear error indication */
7203         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7204         if (params->derr)
7205                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7206         else
7207                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7208
7209         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7210
7211 enable_clk_gate:
7212         if (params->disable_clock_gating) {
7213                 hdev->asic_funcs->set_clock_gating(hdev);
7214
7215                 mutex_unlock(&gaudi->clk_gate_mutex);
7216         }
7217
7218         return rc;
7219 }
7220
7221 /*
7222  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7223  *
7224  * @idx: the current pi/ci value
7225  * @q_len: the queue length (power of 2)
7226  *
7227  * @return the cyclically decremented index
7228  */
7229 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7230 {
7231         u32 mask = q_len - 1;
7232
7233         /*
7234          * modular decrement is equivalent to adding (queue_size -1)
7235          * later we take LSBs to make sure the value is in the
7236          * range [0, queue_len - 1]
7237          */
7238         return (idx + q_len - 1) & mask;
7239 }
7240
7241 /**
7242  * gaudi_print_sw_config_stream_data - print SW config stream data
7243  *
7244  * @hdev: pointer to the habanalabs device structure
7245  * @stream: the QMAN's stream
7246  * @qman_base: base address of QMAN registers block
7247  */
7248 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7249                                                 u64 qman_base)
7250 {
7251         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7252         u32 cq_ptr_lo_off, size;
7253
7254         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7255
7256         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7257                                                 stream * cq_ptr_lo_off;
7258         cq_ptr_hi = cq_ptr_lo +
7259                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7260         cq_tsize = cq_ptr_lo +
7261                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7262
7263         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7264         size = RREG32(cq_tsize);
7265         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7266                                                         stream, cq_ptr, size);
7267 }
7268
7269 /**
7270  * gaudi_print_last_pqes_on_err - print last PQEs on error
7271  *
7272  * @hdev: pointer to the habanalabs device structure
7273  * @qid_base: first QID of the QMAN (out of 4 streams)
7274  * @stream: the QMAN's stream
7275  * @qman_base: base address of QMAN registers block
7276  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7277  */
7278 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7279                                                 u32 stream, u64 qman_base,
7280                                                 bool pr_sw_conf)
7281 {
7282         u32 ci, qm_ci_stream_off, queue_len;
7283         struct hl_hw_queue *q;
7284         u64 pq_ci;
7285         int i;
7286
7287         q = &hdev->kernel_queues[qid_base + stream];
7288
7289         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7290         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7291                                                 stream * qm_ci_stream_off;
7292
7293         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7294                                         q->int_queue_len : HL_QUEUE_LENGTH;
7295
7296         hdev->asic_funcs->hw_queues_lock(hdev);
7297
7298         if (pr_sw_conf)
7299                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7300
7301         ci = RREG32(pq_ci);
7302
7303         /* we should start printing form ci -1 */
7304         ci = gaudi_queue_idx_dec(ci, queue_len);
7305
7306         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7307                 struct hl_bd *bd;
7308                 u64 addr;
7309                 u32 len;
7310
7311                 bd = q->kernel_address;
7312                 bd += ci;
7313
7314                 len = le32_to_cpu(bd->len);
7315                 /* len 0 means uninitialized entry- break */
7316                 if (!len)
7317                         break;
7318
7319                 addr = le64_to_cpu(bd->ptr);
7320
7321                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7322                                                         stream, ci, addr, len);
7323
7324                 /* get previous ci, wrap if needed */
7325                 ci = gaudi_queue_idx_dec(ci, queue_len);
7326         }
7327
7328         hdev->asic_funcs->hw_queues_unlock(hdev);
7329 }
7330
7331 /**
7332  * print_qman_data_on_err - extract QMAN data on error
7333  *
7334  * @hdev: pointer to the habanalabs device structure
7335  * @qid_base: first QID of the QMAN (out of 4 streams)
7336  * @stream: the QMAN's stream
7337  * @qman_base: base address of QMAN registers block
7338  *
7339  * This function attempt to exatract as much data as possible on QMAN error.
7340  * On upper CP print the SW config stream data and last 8 PQEs.
7341  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7342  */
7343 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7344                                                 u32 stream, u64 qman_base)
7345 {
7346         u32 i;
7347
7348         if (stream != QMAN_STREAMS) {
7349                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7350                                                                         true);
7351                 return;
7352         }
7353
7354         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7355
7356         for (i = 0; i < QMAN_STREAMS; i++)
7357                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7358                                                                         false);
7359 }
7360
7361 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7362                                           const char *qm_name,
7363                                           u64 qman_base,
7364                                           u32 qid_base)
7365 {
7366         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7367         u64 glbl_sts_addr, arb_err_addr;
7368         char reg_desc[32];
7369
7370         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7371         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7372
7373         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7374         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7375                 glbl_sts_clr_val = 0;
7376                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7377
7378                 if (!glbl_sts_val)
7379                         continue;
7380
7381                 if (i == QMAN_STREAMS)
7382                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7383                 else
7384                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7385
7386                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7387                         if (glbl_sts_val & BIT(j)) {
7388                                 dev_err_ratelimited(hdev->dev,
7389                                                 "%s %s. err cause: %s\n",
7390                                                 qm_name, reg_desc,
7391                                                 gaudi_qman_error_cause[j]);
7392                                 glbl_sts_clr_val |= BIT(j);
7393                         }
7394                 }
7395
7396                 /* Write 1 clear errors */
7397                 if (!hdev->stop_on_err)
7398                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7399                 else
7400                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7401         }
7402
7403         arb_err_val = RREG32(arb_err_addr);
7404
7405         if (!arb_err_val)
7406                 return;
7407
7408         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7409                 if (arb_err_val & BIT(j)) {
7410                         dev_err_ratelimited(hdev->dev,
7411                                         "%s ARB_ERR. err cause: %s\n",
7412                                         qm_name,
7413                                         gaudi_qman_arb_error_cause[j]);
7414                 }
7415         }
7416 }
7417
7418 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7419                 struct hl_eq_sm_sei_data *sei_data)
7420 {
7421         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7422
7423         /* Flip the bits as the enum is ordered in the opposite way */
7424         index = (index ^ 0x3) & 0x3;
7425
7426         switch (sei_data->sei_cause) {
7427         case SM_SEI_SO_OVERFLOW:
7428                 dev_err_ratelimited(hdev->dev,
7429                         "%s SEI Error: SOB Group %u overflow/underflow",
7430                         gaudi_sync_manager_names[index],
7431                         le32_to_cpu(sei_data->sei_log));
7432                 break;
7433         case SM_SEI_LBW_4B_UNALIGNED:
7434                 dev_err_ratelimited(hdev->dev,
7435                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7436                         gaudi_sync_manager_names[index],
7437                         le32_to_cpu(sei_data->sei_log));
7438                 break;
7439         case SM_SEI_AXI_RESPONSE_ERR:
7440                 dev_err_ratelimited(hdev->dev,
7441                         "%s SEI Error: AXI ID %u response error",
7442                         gaudi_sync_manager_names[index],
7443                         le32_to_cpu(sei_data->sei_log));
7444                 break;
7445         default:
7446                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7447                                 le32_to_cpu(sei_data->sei_log));
7448                 break;
7449         }
7450 }
7451
7452 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7453                 struct hl_eq_ecc_data *ecc_data)
7454 {
7455         struct ecc_info_extract_params params;
7456         u64 ecc_address = 0, ecc_syndrom = 0;
7457         u8 index, memory_wrapper_idx = 0;
7458         bool extract_info_from_fw;
7459         int rc;
7460
7461         switch (event_type) {
7462         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7463         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7464                 extract_info_from_fw = true;
7465                 break;
7466         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7467                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7468                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7469                 params.num_memories = 90;
7470                 params.derr = false;
7471                 params.disable_clock_gating = true;
7472                 extract_info_from_fw = false;
7473                 break;
7474         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7475                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7476                 params.block_address =
7477                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7478                 params.num_memories = 90;
7479                 params.derr = true;
7480                 params.disable_clock_gating = true;
7481                 extract_info_from_fw = false;
7482                 break;
7483         case GAUDI_EVENT_MME0_ACC_SERR:
7484         case GAUDI_EVENT_MME1_ACC_SERR:
7485         case GAUDI_EVENT_MME2_ACC_SERR:
7486         case GAUDI_EVENT_MME3_ACC_SERR:
7487                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7488                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7489                 params.num_memories = 128;
7490                 params.derr = false;
7491                 params.disable_clock_gating = true;
7492                 extract_info_from_fw = false;
7493                 break;
7494         case GAUDI_EVENT_MME0_ACC_DERR:
7495         case GAUDI_EVENT_MME1_ACC_DERR:
7496         case GAUDI_EVENT_MME2_ACC_DERR:
7497         case GAUDI_EVENT_MME3_ACC_DERR:
7498                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7499                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7500                 params.num_memories = 128;
7501                 params.derr = true;
7502                 params.disable_clock_gating = true;
7503                 extract_info_from_fw = false;
7504                 break;
7505         case GAUDI_EVENT_MME0_SBAB_SERR:
7506         case GAUDI_EVENT_MME1_SBAB_SERR:
7507         case GAUDI_EVENT_MME2_SBAB_SERR:
7508         case GAUDI_EVENT_MME3_SBAB_SERR:
7509                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7510                 params.block_address =
7511                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7512                 params.num_memories = 33;
7513                 params.derr = false;
7514                 params.disable_clock_gating = true;
7515                 extract_info_from_fw = false;
7516                 break;
7517         case GAUDI_EVENT_MME0_SBAB_DERR:
7518         case GAUDI_EVENT_MME1_SBAB_DERR:
7519         case GAUDI_EVENT_MME2_SBAB_DERR:
7520         case GAUDI_EVENT_MME3_SBAB_DERR:
7521                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7522                 params.block_address =
7523                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7524                 params.num_memories = 33;
7525                 params.derr = true;
7526                 params.disable_clock_gating = true;
7527                 extract_info_from_fw = false;
7528                 break;
7529         default:
7530                 return;
7531         }
7532
7533         if (extract_info_from_fw) {
7534                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7535                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7536                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7537         } else {
7538                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7539                                 &ecc_syndrom, &memory_wrapper_idx);
7540                 if (rc)
7541                         return;
7542         }
7543
7544         dev_err(hdev->dev,
7545                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7546                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7547 }
7548
7549 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7550 {
7551         u64 qman_base;
7552         char desc[32];
7553         u32 qid_base;
7554         u8 index;
7555
7556         switch (event_type) {
7557         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7558                 index = event_type - GAUDI_EVENT_TPC0_QM;
7559                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7560                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7561                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7562                 break;
7563         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7564                 index = event_type - GAUDI_EVENT_MME0_QM;
7565                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7566                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7567                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7568                 break;
7569         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7570                 index = event_type - GAUDI_EVENT_DMA0_QM;
7571                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7572                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7573                 if (index > 1)
7574                         qid_base++;
7575                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7576                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7577                 break;
7578         case GAUDI_EVENT_NIC0_QM0:
7579                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7580                 qman_base = mmNIC0_QM0_BASE;
7581                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7582                 break;
7583         case GAUDI_EVENT_NIC0_QM1:
7584                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7585                 qman_base = mmNIC0_QM1_BASE;
7586                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7587                 break;
7588         case GAUDI_EVENT_NIC1_QM0:
7589                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7590                 qman_base = mmNIC1_QM0_BASE;
7591                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7592                 break;
7593         case GAUDI_EVENT_NIC1_QM1:
7594                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7595                 qman_base = mmNIC1_QM1_BASE;
7596                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7597                 break;
7598         case GAUDI_EVENT_NIC2_QM0:
7599                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7600                 qman_base = mmNIC2_QM0_BASE;
7601                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7602                 break;
7603         case GAUDI_EVENT_NIC2_QM1:
7604                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7605                 qman_base = mmNIC2_QM1_BASE;
7606                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7607                 break;
7608         case GAUDI_EVENT_NIC3_QM0:
7609                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7610                 qman_base = mmNIC3_QM0_BASE;
7611                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7612                 break;
7613         case GAUDI_EVENT_NIC3_QM1:
7614                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7615                 qman_base = mmNIC3_QM1_BASE;
7616                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7617                 break;
7618         case GAUDI_EVENT_NIC4_QM0:
7619                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7620                 qman_base = mmNIC4_QM0_BASE;
7621                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7622                 break;
7623         case GAUDI_EVENT_NIC4_QM1:
7624                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7625                 qman_base = mmNIC4_QM1_BASE;
7626                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7627                 break;
7628         default:
7629                 return;
7630         }
7631
7632         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7633 }
7634
7635 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7636                                         bool razwi)
7637 {
7638         char desc[64] = "";
7639
7640         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7641         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7642                 event_type, desc);
7643
7644         if (razwi) {
7645                 gaudi_print_razwi_info(hdev);
7646                 gaudi_print_mmu_error_info(hdev);
7647         }
7648 }
7649
7650 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7651                                         struct cpucp_pkt_sync_err *sync_err)
7652 {
7653         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7654
7655         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7656                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7657 }
7658
7659 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7660                                         struct hl_eq_fw_alive *fw_alive)
7661 {
7662         dev_err(hdev->dev,
7663                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7664                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7665                 "Minor" : "Critical", fw_alive->process_id,
7666                 fw_alive->thread_id, fw_alive->uptime_seconds);
7667 }
7668
7669 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7670 {
7671         struct gaudi_device *gaudi = hdev->asic_specific;
7672
7673         /* Unmask all IRQs since some could have been received
7674          * during the soft reset
7675          */
7676         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7677 }
7678
7679 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7680                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7681 {
7682         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7683         int rc = 0;
7684
7685         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7686                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7687                 if (!hbm_ecc_data) {
7688                         dev_err(hdev->dev, "No FW ECC data");
7689                         return 0;
7690                 }
7691
7692                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7693                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7694                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7695                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7696                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7697                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7698                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7699                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7700                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7701                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7702                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7703                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7704                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7705                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7706
7707                 dev_err(hdev->dev,
7708                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7709                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7710                 dev_err(hdev->dev,
7711                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7712                         device, ch, hbm_ecc_data->first_addr, type,
7713                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7714                         hbm_ecc_data->dec_cnt);
7715                 return 0;
7716         }
7717
7718         if (hdev->asic_prop.fw_security_enabled) {
7719                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7720                 return 0;
7721         }
7722
7723         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7724         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7725                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7726                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7727                 if (val) {
7728                         rc = -EIO;
7729                         dev_err(hdev->dev,
7730                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7731                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7732                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7733                                 (val >> 4) & 0x1);
7734
7735                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7736                         dev_err(hdev->dev,
7737                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7738                                 device, ch * 2,
7739                                 RREG32(base + ch * 0x1000 + 0x064),
7740                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7741                                 (val2 & 0xFF0000) >> 16,
7742                                 (val2 & 0xFF000000) >> 24);
7743                 }
7744
7745                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7746                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7747                 if (val) {
7748                         rc = -EIO;
7749                         dev_err(hdev->dev,
7750                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7751                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7752                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7753                                 (val >> 4) & 0x1);
7754
7755                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7756                         dev_err(hdev->dev,
7757                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7758                                 device, ch * 2 + 1,
7759                                 RREG32(base + ch * 0x1000 + 0x074),
7760                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7761                                 (val2 & 0xFF0000) >> 16,
7762                                 (val2 & 0xFF000000) >> 24);
7763                 }
7764
7765                 /* Clear interrupts */
7766                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7767                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7768                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7769                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7770                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7771                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7772         }
7773
7774         val  = RREG32(base + 0x8F30);
7775         val2 = RREG32(base + 0x8F34);
7776         if (val | val2) {
7777                 rc = -EIO;
7778                 dev_err(hdev->dev,
7779                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7780                         device, val, val2);
7781         }
7782         val  = RREG32(base + 0x8F40);
7783         val2 = RREG32(base + 0x8F44);
7784         if (val | val2) {
7785                 rc = -EIO;
7786                 dev_err(hdev->dev,
7787                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7788                         device, val, val2);
7789         }
7790
7791         return rc;
7792 }
7793
7794 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7795 {
7796         switch (hbm_event_type) {
7797         case GAUDI_EVENT_HBM0_SPI_0:
7798         case GAUDI_EVENT_HBM0_SPI_1:
7799                 return 0;
7800         case GAUDI_EVENT_HBM1_SPI_0:
7801         case GAUDI_EVENT_HBM1_SPI_1:
7802                 return 1;
7803         case GAUDI_EVENT_HBM2_SPI_0:
7804         case GAUDI_EVENT_HBM2_SPI_1:
7805                 return 2;
7806         case GAUDI_EVENT_HBM3_SPI_0:
7807         case GAUDI_EVENT_HBM3_SPI_1:
7808                 return 3;
7809         default:
7810                 break;
7811         }
7812
7813         /* Should never happen */
7814         return 0;
7815 }
7816
7817 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7818                                         char *interrupt_name)
7819 {
7820         struct gaudi_device *gaudi = hdev->asic_specific;
7821         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7822         bool soft_reset_required = false;
7823
7824         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7825          * gating, and thus cannot be done in CPU-CP and should be done instead
7826          * by the driver.
7827          */
7828
7829         mutex_lock(&gaudi->clk_gate_mutex);
7830
7831         hdev->asic_funcs->disable_clock_gating(hdev);
7832
7833         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7834                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7835
7836         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7837                 if (tpc_interrupts_cause & BIT(i)) {
7838                         dev_err_ratelimited(hdev->dev,
7839                                         "TPC%d_%s interrupt cause: %s\n",
7840                                         tpc_id, interrupt_name,
7841                                         gaudi_tpc_interrupts_cause[i]);
7842                         /* If this is QM error, we need to soft-reset */
7843                         if (i == 15)
7844                                 soft_reset_required = true;
7845                 }
7846
7847         /* Clear interrupts */
7848         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7849
7850         hdev->asic_funcs->set_clock_gating(hdev);
7851
7852         mutex_unlock(&gaudi->clk_gate_mutex);
7853
7854         return soft_reset_required;
7855 }
7856
7857 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7858 {
7859         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7860 }
7861
7862 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7863 {
7864         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7865 }
7866
7867 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7868                                         u16 event_type)
7869 {
7870         switch (event_type) {
7871         case GAUDI_EVENT_FIX_POWER_ENV_S:
7872                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7873                 dev_info_ratelimited(hdev->dev,
7874                         "Clock throttling due to power consumption\n");
7875                 break;
7876
7877         case GAUDI_EVENT_FIX_POWER_ENV_E:
7878                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7879                 dev_info_ratelimited(hdev->dev,
7880                         "Power envelop is safe, back to optimal clock\n");
7881                 break;
7882
7883         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7884                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7885                 dev_info_ratelimited(hdev->dev,
7886                         "Clock throttling due to overheating\n");
7887                 break;
7888
7889         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7890                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7891                 dev_info_ratelimited(hdev->dev,
7892                         "Thermal envelop is safe, back to optimal clock\n");
7893                 break;
7894
7895         default:
7896                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7897                         event_type);
7898                 break;
7899         }
7900 }
7901
7902 static void gaudi_handle_eqe(struct hl_device *hdev,
7903                                 struct hl_eq_entry *eq_entry)
7904 {
7905         struct gaudi_device *gaudi = hdev->asic_specific;
7906         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7907         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7908                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7909         bool reset_required;
7910         u8 cause;
7911         int rc;
7912
7913         gaudi->events_stat[event_type]++;
7914         gaudi->events_stat_aggregate[event_type]++;
7915
7916         switch (event_type) {
7917         case GAUDI_EVENT_PCIE_CORE_DERR:
7918         case GAUDI_EVENT_PCIE_IF_DERR:
7919         case GAUDI_EVENT_PCIE_PHY_DERR:
7920         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7921         case GAUDI_EVENT_MME0_ACC_DERR:
7922         case GAUDI_EVENT_MME0_SBAB_DERR:
7923         case GAUDI_EVENT_MME1_ACC_DERR:
7924         case GAUDI_EVENT_MME1_SBAB_DERR:
7925         case GAUDI_EVENT_MME2_ACC_DERR:
7926         case GAUDI_EVENT_MME2_SBAB_DERR:
7927         case GAUDI_EVENT_MME3_ACC_DERR:
7928         case GAUDI_EVENT_MME3_SBAB_DERR:
7929         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7930                 fallthrough;
7931         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7932         case GAUDI_EVENT_PSOC_MEM_DERR:
7933         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7934         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7935         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7936         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7937         case GAUDI_EVENT_MMU_DERR:
7938         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7939                 gaudi_print_irq_info(hdev, event_type, true);
7940                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7941                 goto reset_device;
7942
7943         case GAUDI_EVENT_GIC500:
7944         case GAUDI_EVENT_AXI_ECC:
7945         case GAUDI_EVENT_L2_RAM_ECC:
7946         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7947                 gaudi_print_irq_info(hdev, event_type, false);
7948                 goto reset_device;
7949
7950         case GAUDI_EVENT_HBM0_SPI_0:
7951         case GAUDI_EVENT_HBM1_SPI_0:
7952         case GAUDI_EVENT_HBM2_SPI_0:
7953         case GAUDI_EVENT_HBM3_SPI_0:
7954                 gaudi_print_irq_info(hdev, event_type, false);
7955                 gaudi_hbm_read_interrupts(hdev,
7956                                 gaudi_hbm_event_to_dev(event_type),
7957                                 &eq_entry->hbm_ecc_data);
7958                 goto reset_device;
7959
7960         case GAUDI_EVENT_HBM0_SPI_1:
7961         case GAUDI_EVENT_HBM1_SPI_1:
7962         case GAUDI_EVENT_HBM2_SPI_1:
7963         case GAUDI_EVENT_HBM3_SPI_1:
7964                 gaudi_print_irq_info(hdev, event_type, false);
7965                 gaudi_hbm_read_interrupts(hdev,
7966                                 gaudi_hbm_event_to_dev(event_type),
7967                                 &eq_entry->hbm_ecc_data);
7968                 hl_fw_unmask_irq(hdev, event_type);
7969                 break;
7970
7971         case GAUDI_EVENT_TPC0_DEC:
7972         case GAUDI_EVENT_TPC1_DEC:
7973         case GAUDI_EVENT_TPC2_DEC:
7974         case GAUDI_EVENT_TPC3_DEC:
7975         case GAUDI_EVENT_TPC4_DEC:
7976         case GAUDI_EVENT_TPC5_DEC:
7977         case GAUDI_EVENT_TPC6_DEC:
7978         case GAUDI_EVENT_TPC7_DEC:
7979                 gaudi_print_irq_info(hdev, event_type, true);
7980                 reset_required = gaudi_tpc_read_interrupts(hdev,
7981                                         tpc_dec_event_to_tpc_id(event_type),
7982                                         "AXI_SLV_DEC_Error");
7983                 if (reset_required) {
7984                         dev_err(hdev->dev, "hard reset required due to %s\n",
7985                                 gaudi_irq_map_table[event_type].name);
7986
7987                         goto reset_device;
7988                 } else {
7989                         hl_fw_unmask_irq(hdev, event_type);
7990                 }
7991                 break;
7992
7993         case GAUDI_EVENT_TPC0_KRN_ERR:
7994         case GAUDI_EVENT_TPC1_KRN_ERR:
7995         case GAUDI_EVENT_TPC2_KRN_ERR:
7996         case GAUDI_EVENT_TPC3_KRN_ERR:
7997         case GAUDI_EVENT_TPC4_KRN_ERR:
7998         case GAUDI_EVENT_TPC5_KRN_ERR:
7999         case GAUDI_EVENT_TPC6_KRN_ERR:
8000         case GAUDI_EVENT_TPC7_KRN_ERR:
8001                 gaudi_print_irq_info(hdev, event_type, true);
8002                 reset_required = gaudi_tpc_read_interrupts(hdev,
8003                                         tpc_krn_event_to_tpc_id(event_type),
8004                                         "KRN_ERR");
8005                 if (reset_required) {
8006                         dev_err(hdev->dev, "hard reset required due to %s\n",
8007                                 gaudi_irq_map_table[event_type].name);
8008
8009                         goto reset_device;
8010                 } else {
8011                         hl_fw_unmask_irq(hdev, event_type);
8012                 }
8013                 break;
8014
8015         case GAUDI_EVENT_PCIE_CORE_SERR:
8016         case GAUDI_EVENT_PCIE_IF_SERR:
8017         case GAUDI_EVENT_PCIE_PHY_SERR:
8018         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8019         case GAUDI_EVENT_MME0_ACC_SERR:
8020         case GAUDI_EVENT_MME0_SBAB_SERR:
8021         case GAUDI_EVENT_MME1_ACC_SERR:
8022         case GAUDI_EVENT_MME1_SBAB_SERR:
8023         case GAUDI_EVENT_MME2_ACC_SERR:
8024         case GAUDI_EVENT_MME2_SBAB_SERR:
8025         case GAUDI_EVENT_MME3_ACC_SERR:
8026         case GAUDI_EVENT_MME3_SBAB_SERR:
8027         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8028         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8029         case GAUDI_EVENT_PSOC_MEM_SERR:
8030         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8031         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8032         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8033         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8034                 fallthrough;
8035         case GAUDI_EVENT_MMU_SERR:
8036                 gaudi_print_irq_info(hdev, event_type, true);
8037                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8038                 hl_fw_unmask_irq(hdev, event_type);
8039                 break;
8040
8041         case GAUDI_EVENT_PCIE_DEC:
8042         case GAUDI_EVENT_MME0_WBC_RSP:
8043         case GAUDI_EVENT_MME0_SBAB0_RSP:
8044         case GAUDI_EVENT_MME1_WBC_RSP:
8045         case GAUDI_EVENT_MME1_SBAB0_RSP:
8046         case GAUDI_EVENT_MME2_WBC_RSP:
8047         case GAUDI_EVENT_MME2_SBAB0_RSP:
8048         case GAUDI_EVENT_MME3_WBC_RSP:
8049         case GAUDI_EVENT_MME3_SBAB0_RSP:
8050         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8051         case GAUDI_EVENT_PSOC_AXI_DEC:
8052         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8053         case GAUDI_EVENT_MMU_PAGE_FAULT:
8054         case GAUDI_EVENT_MMU_WR_PERM:
8055         case GAUDI_EVENT_RAZWI_OR_ADC:
8056         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8057         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8058         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8059                 fallthrough;
8060         case GAUDI_EVENT_NIC0_QM0:
8061         case GAUDI_EVENT_NIC0_QM1:
8062         case GAUDI_EVENT_NIC1_QM0:
8063         case GAUDI_EVENT_NIC1_QM1:
8064         case GAUDI_EVENT_NIC2_QM0:
8065         case GAUDI_EVENT_NIC2_QM1:
8066         case GAUDI_EVENT_NIC3_QM0:
8067         case GAUDI_EVENT_NIC3_QM1:
8068         case GAUDI_EVENT_NIC4_QM0:
8069         case GAUDI_EVENT_NIC4_QM1:
8070         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8071                 gaudi_print_irq_info(hdev, event_type, true);
8072                 gaudi_handle_qman_err(hdev, event_type);
8073                 hl_fw_unmask_irq(hdev, event_type);
8074                 break;
8075
8076         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8077                 gaudi_print_irq_info(hdev, event_type, true);
8078                 goto reset_device;
8079
8080         case GAUDI_EVENT_TPC0_BMON_SPMU:
8081         case GAUDI_EVENT_TPC1_BMON_SPMU:
8082         case GAUDI_EVENT_TPC2_BMON_SPMU:
8083         case GAUDI_EVENT_TPC3_BMON_SPMU:
8084         case GAUDI_EVENT_TPC4_BMON_SPMU:
8085         case GAUDI_EVENT_TPC5_BMON_SPMU:
8086         case GAUDI_EVENT_TPC6_BMON_SPMU:
8087         case GAUDI_EVENT_TPC7_BMON_SPMU:
8088         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8089                 gaudi_print_irq_info(hdev, event_type, false);
8090                 hl_fw_unmask_irq(hdev, event_type);
8091                 break;
8092
8093         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8094                 gaudi_print_irq_info(hdev, event_type, false);
8095                 gaudi_print_sm_sei_info(hdev, event_type,
8096                                         &eq_entry->sm_sei_data);
8097                 rc = hl_state_dump(hdev);
8098                 if (rc)
8099                         dev_err(hdev->dev,
8100                                 "Error during system state dump %d\n", rc);
8101                 hl_fw_unmask_irq(hdev, event_type);
8102                 break;
8103
8104         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8105                 gaudi_print_clk_change_info(hdev, event_type);
8106                 hl_fw_unmask_irq(hdev, event_type);
8107                 break;
8108
8109         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8110                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8111                 dev_err(hdev->dev,
8112                         "Received high temp H/W interrupt %d (cause %d)\n",
8113                         event_type, cause);
8114                 break;
8115
8116         case GAUDI_EVENT_DEV_RESET_REQ:
8117                 gaudi_print_irq_info(hdev, event_type, false);
8118                 goto reset_device;
8119
8120         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8121                 gaudi_print_irq_info(hdev, event_type, false);
8122                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8123                 goto reset_device;
8124
8125         case GAUDI_EVENT_FW_ALIVE_S:
8126                 gaudi_print_irq_info(hdev, event_type, false);
8127                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8128                 goto reset_device;
8129
8130         default:
8131                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8132                                 event_type);
8133                 break;
8134         }
8135
8136         return;
8137
8138 reset_device:
8139         if (hdev->hard_reset_on_fw_events)
8140                 hl_device_reset(hdev, HL_RESET_HARD);
8141         else
8142                 hl_fw_unmask_irq(hdev, event_type);
8143 }
8144
8145 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8146                                         u32 *size)
8147 {
8148         struct gaudi_device *gaudi = hdev->asic_specific;
8149
8150         if (aggregate) {
8151                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8152                 return gaudi->events_stat_aggregate;
8153         }
8154
8155         *size = (u32) sizeof(gaudi->events_stat);
8156         return gaudi->events_stat;
8157 }
8158
8159 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8160                                         u32 flags)
8161 {
8162         struct gaudi_device *gaudi = hdev->asic_specific;
8163         u32 status, timeout_usec;
8164         int rc;
8165
8166         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8167                 hdev->hard_reset_pending)
8168                 return 0;
8169
8170         if (hdev->pldm)
8171                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8172         else
8173                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8174
8175         /* L0 & L1 invalidation */
8176         WREG32(mmSTLB_INV_PS, 3);
8177         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8178         WREG32(mmSTLB_INV_PS, 2);
8179
8180         rc = hl_poll_timeout(
8181                 hdev,
8182                 mmSTLB_INV_PS,
8183                 status,
8184                 !status,
8185                 1000,
8186                 timeout_usec);
8187
8188         WREG32(mmSTLB_INV_SET, 0);
8189
8190         if (rc) {
8191                 dev_err_ratelimited(hdev->dev,
8192                                         "MMU cache invalidation timeout\n");
8193                 hl_device_reset(hdev, HL_RESET_HARD);
8194         }
8195
8196         return rc;
8197 }
8198
8199 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8200                                                 bool is_hard, u32 flags,
8201                                                 u32 asid, u64 va, u64 size)
8202 {
8203         /* Treat as invalidate all because there is no range invalidation
8204          * in Gaudi
8205          */
8206         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8207 }
8208
8209 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8210                                         u32 asid, u64 phys_addr)
8211 {
8212         u32 status, timeout_usec;
8213         int rc;
8214
8215         if (hdev->pldm)
8216                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8217         else
8218                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8219
8220         WREG32(MMU_ASID, asid);
8221         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8222         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8223         WREG32(MMU_BUSY, 0x80000000);
8224
8225         rc = hl_poll_timeout(
8226                 hdev,
8227                 MMU_BUSY,
8228                 status,
8229                 !(status & 0x80000000),
8230                 1000,
8231                 timeout_usec);
8232
8233         if (rc) {
8234                 dev_err(hdev->dev,
8235                         "Timeout during MMU hop0 config of asid %d\n", asid);
8236                 return rc;
8237         }
8238
8239         return 0;
8240 }
8241
8242 static int gaudi_send_heartbeat(struct hl_device *hdev)
8243 {
8244         struct gaudi_device *gaudi = hdev->asic_specific;
8245
8246         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8247                 return 0;
8248
8249         return hl_fw_send_heartbeat(hdev);
8250 }
8251
8252 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8253 {
8254         struct gaudi_device *gaudi = hdev->asic_specific;
8255         struct asic_fixed_properties *prop = &hdev->asic_prop;
8256         int rc;
8257
8258         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8259                 return 0;
8260
8261         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8262                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8263                                         mmCPU_BOOT_ERR1);
8264         if (rc)
8265                 return rc;
8266
8267         if (!strlen(prop->cpucp_info.card_name))
8268                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8269                                 CARD_NAME_MAX_LEN);
8270
8271         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8272
8273         set_default_power_values(hdev);
8274
8275         hdev->max_power = prop->max_power_default;
8276
8277         return 0;
8278 }
8279
8280 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8281                                         u8 mask_len, struct seq_file *s)
8282 {
8283         struct gaudi_device *gaudi = hdev->asic_specific;
8284         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8285         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8286         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8287         unsigned long *mask = (unsigned long *)mask_arr;
8288         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8289         bool is_idle = true, is_eng_idle, is_slave;
8290         u64 offset;
8291         int i, dma_id, port;
8292
8293         mutex_lock(&gaudi->clk_gate_mutex);
8294
8295         hdev->asic_funcs->disable_clock_gating(hdev);
8296
8297         if (s)
8298                 seq_puts(s,
8299                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8300                         "---  -------  ------------  ----------  -------------\n");
8301
8302         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8303                 dma_id = gaudi_dma_assignment[i];
8304                 offset = dma_id * DMA_QMAN_OFFSET;
8305
8306                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8307                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8308                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8309                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8310                                 IS_DMA_IDLE(dma_core_sts0);
8311                 is_idle &= is_eng_idle;
8312
8313                 if (mask && !is_eng_idle)
8314                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8315                 if (s)
8316                         seq_printf(s, fmt, dma_id,
8317                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8318                                 qm_cgm_sts, dma_core_sts0);
8319         }
8320
8321         if (s)
8322                 seq_puts(s,
8323                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8324                         "---  -------  ------------  ----------  ----------\n");
8325
8326         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8327                 offset = i * TPC_QMAN_OFFSET;
8328                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8329                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8330                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8331                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8332                                 IS_TPC_IDLE(tpc_cfg_sts);
8333                 is_idle &= is_eng_idle;
8334
8335                 if (mask && !is_eng_idle)
8336                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8337                 if (s)
8338                         seq_printf(s, fmt, i,
8339                                 is_eng_idle ? "Y" : "N",
8340                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8341         }
8342
8343         if (s)
8344                 seq_puts(s,
8345                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8346                         "---  -------  ------------  ----------  -----------\n");
8347
8348         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8349                 offset = i * MME_QMAN_OFFSET;
8350                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8351                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8352
8353                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8354                 is_slave = i % 2;
8355                 if (!is_slave) {
8356                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8357                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8358                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8359                 }
8360
8361                 is_idle &= is_eng_idle;
8362
8363                 if (mask && !is_eng_idle)
8364                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8365                 if (s) {
8366                         if (!is_slave)
8367                                 seq_printf(s, fmt, i,
8368                                         is_eng_idle ? "Y" : "N",
8369                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8370                         else
8371                                 seq_printf(s, mme_slave_fmt, i,
8372                                         is_eng_idle ? "Y" : "N", "-",
8373                                         "-", mme_arch_sts);
8374                 }
8375         }
8376
8377         if (s)
8378                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8379                                 "---  -------  ------------  ----------\n");
8380
8381         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8382                 offset = i * NIC_MACRO_QMAN_OFFSET;
8383                 port = 2 * i;
8384                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8385                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8386                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8387                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8388                         is_idle &= is_eng_idle;
8389
8390                         if (mask && !is_eng_idle)
8391                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8392                         if (s)
8393                                 seq_printf(s, nic_fmt, port,
8394                                                 is_eng_idle ? "Y" : "N",
8395                                                 qm_glbl_sts0, qm_cgm_sts);
8396                 }
8397
8398                 port = 2 * i + 1;
8399                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8400                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8401                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8402                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8403                         is_idle &= is_eng_idle;
8404
8405                         if (mask && !is_eng_idle)
8406                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8407                         if (s)
8408                                 seq_printf(s, nic_fmt, port,
8409                                                 is_eng_idle ? "Y" : "N",
8410                                                 qm_glbl_sts0, qm_cgm_sts);
8411                 }
8412         }
8413
8414         if (s)
8415                 seq_puts(s, "\n");
8416
8417         hdev->asic_funcs->set_clock_gating(hdev);
8418
8419         mutex_unlock(&gaudi->clk_gate_mutex);
8420
8421         return is_idle;
8422 }
8423
8424 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8425         __acquires(&gaudi->hw_queues_lock)
8426 {
8427         struct gaudi_device *gaudi = hdev->asic_specific;
8428
8429         spin_lock(&gaudi->hw_queues_lock);
8430 }
8431
8432 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8433         __releases(&gaudi->hw_queues_lock)
8434 {
8435         struct gaudi_device *gaudi = hdev->asic_specific;
8436
8437         spin_unlock(&gaudi->hw_queues_lock);
8438 }
8439
8440 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8441 {
8442         return hdev->pdev->device;
8443 }
8444
8445 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8446                                 size_t max_size)
8447 {
8448         struct gaudi_device *gaudi = hdev->asic_specific;
8449
8450         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8451                 return 0;
8452
8453         return hl_fw_get_eeprom_data(hdev, data, max_size);
8454 }
8455
8456 /*
8457  * this function should be used only during initialization and/or after reset,
8458  * when there are no active users.
8459  */
8460 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8461                                 u32 tpc_id)
8462 {
8463         struct gaudi_device *gaudi = hdev->asic_specific;
8464         u64 kernel_timeout;
8465         u32 status, offset;
8466         int rc;
8467
8468         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8469
8470         if (hdev->pldm)
8471                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8472         else
8473                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8474
8475         mutex_lock(&gaudi->clk_gate_mutex);
8476
8477         hdev->asic_funcs->disable_clock_gating(hdev);
8478
8479         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8480                         lower_32_bits(tpc_kernel));
8481         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8482                         upper_32_bits(tpc_kernel));
8483
8484         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8485                         lower_32_bits(tpc_kernel));
8486         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8487                         upper_32_bits(tpc_kernel));
8488         /* set a valid LUT pointer, content is of no significance */
8489         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8490                         lower_32_bits(tpc_kernel));
8491         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8492                         upper_32_bits(tpc_kernel));
8493
8494         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8495                         lower_32_bits(CFG_BASE +
8496                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8497
8498         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8499                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8500                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8501         /* wait a bit for the engine to start executing */
8502         usleep_range(1000, 1500);
8503
8504         /* wait until engine has finished executing */
8505         rc = hl_poll_timeout(
8506                 hdev,
8507                 mmTPC0_CFG_STATUS + offset,
8508                 status,
8509                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8510                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8511                 1000,
8512                 kernel_timeout);
8513
8514         if (rc) {
8515                 dev_err(hdev->dev,
8516                         "Timeout while waiting for TPC%d icache prefetch\n",
8517                         tpc_id);
8518                 hdev->asic_funcs->set_clock_gating(hdev);
8519                 mutex_unlock(&gaudi->clk_gate_mutex);
8520                 return -EIO;
8521         }
8522
8523         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8524                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8525
8526         /* wait a bit for the engine to start executing */
8527         usleep_range(1000, 1500);
8528
8529         /* wait until engine has finished executing */
8530         rc = hl_poll_timeout(
8531                 hdev,
8532                 mmTPC0_CFG_STATUS + offset,
8533                 status,
8534                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8535                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8536                 1000,
8537                 kernel_timeout);
8538
8539         if (rc) {
8540                 dev_err(hdev->dev,
8541                         "Timeout while waiting for TPC%d vector pipe\n",
8542                         tpc_id);
8543                 hdev->asic_funcs->set_clock_gating(hdev);
8544                 mutex_unlock(&gaudi->clk_gate_mutex);
8545                 return -EIO;
8546         }
8547
8548         rc = hl_poll_timeout(
8549                 hdev,
8550                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8551                 status,
8552                 (status == 0),
8553                 1000,
8554                 kernel_timeout);
8555
8556         hdev->asic_funcs->set_clock_gating(hdev);
8557         mutex_unlock(&gaudi->clk_gate_mutex);
8558
8559         if (rc) {
8560                 dev_err(hdev->dev,
8561                         "Timeout while waiting for TPC%d kernel to execute\n",
8562                         tpc_id);
8563                 return -EIO;
8564         }
8565
8566         return 0;
8567 }
8568
8569 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8570                 struct hl_ctx *ctx)
8571 {
8572         struct gaudi_device *gaudi = hdev->asic_specific;
8573         int min_alloc_order, rc, collective_cb_size;
8574
8575         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8576                 return 0;
8577
8578         hdev->internal_cb_pool_virt_addr =
8579                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8580                                         HOST_SPACE_INTERNAL_CB_SZ,
8581                                         &hdev->internal_cb_pool_dma_addr,
8582                                         GFP_KERNEL | __GFP_ZERO);
8583
8584         if (!hdev->internal_cb_pool_virt_addr)
8585                 return -ENOMEM;
8586
8587         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8588                         sizeof(struct packet_fence);
8589         min_alloc_order = ilog2(collective_cb_size);
8590
8591         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8592         if (!hdev->internal_cb_pool) {
8593                 dev_err(hdev->dev,
8594                         "Failed to create internal CB pool\n");
8595                 rc = -ENOMEM;
8596                 goto free_internal_cb_pool;
8597         }
8598
8599         rc = gen_pool_add(hdev->internal_cb_pool,
8600                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8601                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8602         if (rc) {
8603                 dev_err(hdev->dev,
8604                         "Failed to add memory to internal CB pool\n");
8605                 rc = -EFAULT;
8606                 goto destroy_internal_cb_pool;
8607         }
8608
8609         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8610                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8611                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8612
8613         if (!hdev->internal_cb_va_base) {
8614                 rc = -ENOMEM;
8615                 goto destroy_internal_cb_pool;
8616         }
8617
8618         mutex_lock(&ctx->mmu_lock);
8619         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8620                         hdev->internal_cb_pool_dma_addr,
8621                         HOST_SPACE_INTERNAL_CB_SZ);
8622
8623         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8624         mutex_unlock(&ctx->mmu_lock);
8625
8626         if (rc)
8627                 goto unreserve_internal_cb_pool;
8628
8629         return 0;
8630
8631 unreserve_internal_cb_pool:
8632         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8633                         HOST_SPACE_INTERNAL_CB_SZ);
8634 destroy_internal_cb_pool:
8635         gen_pool_destroy(hdev->internal_cb_pool);
8636 free_internal_cb_pool:
8637         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8638                         HOST_SPACE_INTERNAL_CB_SZ,
8639                         hdev->internal_cb_pool_virt_addr,
8640                         hdev->internal_cb_pool_dma_addr);
8641
8642         return rc;
8643 }
8644
8645 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8646                 struct hl_ctx *ctx)
8647 {
8648         struct gaudi_device *gaudi = hdev->asic_specific;
8649
8650         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8651                 return;
8652
8653         mutex_lock(&ctx->mmu_lock);
8654         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8655                         HOST_SPACE_INTERNAL_CB_SZ);
8656         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8657                         HOST_SPACE_INTERNAL_CB_SZ);
8658         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8659         mutex_unlock(&ctx->mmu_lock);
8660
8661         gen_pool_destroy(hdev->internal_cb_pool);
8662
8663         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8664                         HOST_SPACE_INTERNAL_CB_SZ,
8665                         hdev->internal_cb_pool_virt_addr,
8666                         hdev->internal_cb_pool_dma_addr);
8667 }
8668
8669 static int gaudi_ctx_init(struct hl_ctx *ctx)
8670 {
8671         if (ctx->asid == HL_KERNEL_ASID_ID)
8672                 return 0;
8673
8674         gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8675         return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8676 }
8677
8678 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8679 {
8680         if (ctx->asid == HL_KERNEL_ASID_ID)
8681                 return;
8682
8683         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8684 }
8685
8686 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8687 {
8688         return gaudi_cq_assignment[cq_idx];
8689 }
8690
8691 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8692 {
8693         return sizeof(struct packet_msg_short) +
8694                         sizeof(struct packet_msg_prot) * 2;
8695 }
8696
8697 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8698 {
8699         return sizeof(struct packet_msg_short) * 4 +
8700                         sizeof(struct packet_fence) +
8701                         sizeof(struct packet_msg_prot) * 2;
8702 }
8703
8704 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8705                                 u32 size, bool eb)
8706 {
8707         struct hl_cb *cb = (struct hl_cb *) data;
8708         struct packet_msg_short *pkt;
8709         u32 value, ctl, pkt_size = sizeof(*pkt);
8710
8711         pkt = cb->kernel_address + size;
8712         memset(pkt, 0, pkt_size);
8713
8714         /* Inc by 1, Mode ADD */
8715         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8716         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8717
8718         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8719         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8720         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8721         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8722         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8723         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8724         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8725
8726         pkt->value = cpu_to_le32(value);
8727         pkt->ctl = cpu_to_le32(ctl);
8728
8729         return size + pkt_size;
8730 }
8731
8732 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8733                                         u16 addr)
8734 {
8735         u32 ctl, pkt_size = sizeof(*pkt);
8736
8737         memset(pkt, 0, pkt_size);
8738
8739         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8740         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8741         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8742         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8743         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8744         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8745
8746         pkt->value = cpu_to_le32(value);
8747         pkt->ctl = cpu_to_le32(ctl);
8748
8749         return pkt_size;
8750 }
8751
8752 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8753                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8754                 u16 sob_val, u16 mon_id)
8755 {
8756         u64 monitor_base;
8757         u32 ctl, value, pkt_size = sizeof(*pkt);
8758         u16 msg_addr_offset;
8759         u8 mask;
8760
8761         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8762                 dev_err(hdev->dev,
8763                         "sob_base %u (mask %#x) is not valid\n",
8764                         sob_base, sob_mask);
8765                 return 0;
8766         }
8767
8768         /*
8769          * monitor_base should be the content of the base0 address registers,
8770          * so it will be added to the msg short offsets
8771          */
8772         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8773
8774         msg_addr_offset =
8775                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8776                                 monitor_base;
8777
8778         memset(pkt, 0, pkt_size);
8779
8780         /* Monitor config packet: bind the monitor to a sync object */
8781         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8782         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8783         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8784                         0); /* GREATER OR EQUAL*/
8785         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8786
8787         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8788         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8789         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8790         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8791         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8792         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8793         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8794
8795         pkt->value = cpu_to_le32(value);
8796         pkt->ctl = cpu_to_le32(ctl);
8797
8798         return pkt_size;
8799 }
8800
8801 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8802 {
8803         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8804
8805         memset(pkt, 0, pkt_size);
8806
8807         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8808         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8809         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8810
8811         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8812         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8813         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8814         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8815
8816         pkt->cfg = cpu_to_le32(cfg);
8817         pkt->ctl = cpu_to_le32(ctl);
8818
8819         return pkt_size;
8820 }
8821
8822 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8823 {
8824         u32 offset, nic_index;
8825
8826         switch (queue_id) {
8827         case GAUDI_QUEUE_ID_DMA_0_0:
8828                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8829                 break;
8830         case GAUDI_QUEUE_ID_DMA_0_1:
8831                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8832                 break;
8833         case GAUDI_QUEUE_ID_DMA_0_2:
8834                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8835                 break;
8836         case GAUDI_QUEUE_ID_DMA_0_3:
8837                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8838                 break;
8839         case GAUDI_QUEUE_ID_DMA_1_0:
8840                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8841                 break;
8842         case GAUDI_QUEUE_ID_DMA_1_1:
8843                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8844                 break;
8845         case GAUDI_QUEUE_ID_DMA_1_2:
8846                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8847                 break;
8848         case GAUDI_QUEUE_ID_DMA_1_3:
8849                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8850                 break;
8851         case GAUDI_QUEUE_ID_DMA_5_0:
8852                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8853                 break;
8854         case GAUDI_QUEUE_ID_DMA_5_1:
8855                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8856                 break;
8857         case GAUDI_QUEUE_ID_DMA_5_2:
8858                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8859                 break;
8860         case GAUDI_QUEUE_ID_DMA_5_3:
8861                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8862                 break;
8863         case GAUDI_QUEUE_ID_TPC_7_0:
8864                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8865                 break;
8866         case GAUDI_QUEUE_ID_TPC_7_1:
8867                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8868                 break;
8869         case GAUDI_QUEUE_ID_TPC_7_2:
8870                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8871                 break;
8872         case GAUDI_QUEUE_ID_TPC_7_3:
8873                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8874                 break;
8875         case GAUDI_QUEUE_ID_NIC_0_0:
8876         case GAUDI_QUEUE_ID_NIC_1_0:
8877         case GAUDI_QUEUE_ID_NIC_2_0:
8878         case GAUDI_QUEUE_ID_NIC_3_0:
8879         case GAUDI_QUEUE_ID_NIC_4_0:
8880         case GAUDI_QUEUE_ID_NIC_5_0:
8881         case GAUDI_QUEUE_ID_NIC_6_0:
8882         case GAUDI_QUEUE_ID_NIC_7_0:
8883         case GAUDI_QUEUE_ID_NIC_8_0:
8884         case GAUDI_QUEUE_ID_NIC_9_0:
8885                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8886                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8887                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8888                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8889                 break;
8890         case GAUDI_QUEUE_ID_NIC_0_1:
8891         case GAUDI_QUEUE_ID_NIC_1_1:
8892         case GAUDI_QUEUE_ID_NIC_2_1:
8893         case GAUDI_QUEUE_ID_NIC_3_1:
8894         case GAUDI_QUEUE_ID_NIC_4_1:
8895         case GAUDI_QUEUE_ID_NIC_5_1:
8896         case GAUDI_QUEUE_ID_NIC_6_1:
8897         case GAUDI_QUEUE_ID_NIC_7_1:
8898         case GAUDI_QUEUE_ID_NIC_8_1:
8899         case GAUDI_QUEUE_ID_NIC_9_1:
8900                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8901                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8902                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8903                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8904                 break;
8905         case GAUDI_QUEUE_ID_NIC_0_2:
8906         case GAUDI_QUEUE_ID_NIC_1_2:
8907         case GAUDI_QUEUE_ID_NIC_2_2:
8908         case GAUDI_QUEUE_ID_NIC_3_2:
8909         case GAUDI_QUEUE_ID_NIC_4_2:
8910         case GAUDI_QUEUE_ID_NIC_5_2:
8911         case GAUDI_QUEUE_ID_NIC_6_2:
8912         case GAUDI_QUEUE_ID_NIC_7_2:
8913         case GAUDI_QUEUE_ID_NIC_8_2:
8914         case GAUDI_QUEUE_ID_NIC_9_2:
8915                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8916                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8917                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8918                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8919                 break;
8920         case GAUDI_QUEUE_ID_NIC_0_3:
8921         case GAUDI_QUEUE_ID_NIC_1_3:
8922         case GAUDI_QUEUE_ID_NIC_2_3:
8923         case GAUDI_QUEUE_ID_NIC_3_3:
8924         case GAUDI_QUEUE_ID_NIC_4_3:
8925         case GAUDI_QUEUE_ID_NIC_5_3:
8926         case GAUDI_QUEUE_ID_NIC_6_3:
8927         case GAUDI_QUEUE_ID_NIC_7_3:
8928         case GAUDI_QUEUE_ID_NIC_8_3:
8929         case GAUDI_QUEUE_ID_NIC_9_3:
8930                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8931                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8932                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8933                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8934                 break;
8935         default:
8936                 return -EINVAL;
8937         }
8938
8939         *addr = CFG_BASE + offset;
8940
8941         return 0;
8942 }
8943
8944 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8945 {
8946         u64 monitor_base;
8947         u32 size = 0;
8948         u16 msg_addr_offset;
8949
8950         /*
8951          * monitor_base should be the content of the base0 address registers,
8952          * so it will be added to the msg short offsets
8953          */
8954         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8955
8956         /* First monitor config packet: low address of the sync */
8957         msg_addr_offset =
8958                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8959                                 monitor_base;
8960
8961         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8962                                         msg_addr_offset);
8963
8964         /* Second monitor config packet: high address of the sync */
8965         msg_addr_offset =
8966                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8967                                 monitor_base;
8968
8969         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8970                                         msg_addr_offset);
8971
8972         /*
8973          * Third monitor config packet: the payload, i.e. what to write when the
8974          * sync triggers
8975          */
8976         msg_addr_offset =
8977                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8978                                 monitor_base;
8979
8980         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8981
8982         return size;
8983 }
8984
8985 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8986                                 struct hl_gen_wait_properties *prop)
8987 {
8988         struct hl_cb *cb = (struct hl_cb *) prop->data;
8989         void *buf = cb->kernel_address;
8990         u64 fence_addr = 0;
8991         u32 size = prop->size;
8992
8993         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8994                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8995                                 prop->q_idx);
8996                 return 0;
8997         }
8998
8999         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9000         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9001                         prop->sob_mask, prop->sob_val, prop->mon_id);
9002         size += gaudi_add_fence_pkt(buf + size);
9003
9004         return size;
9005 }
9006
9007 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9008 {
9009         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9010         int rc;
9011
9012         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9013                 hw_sob->sob_id);
9014
9015         rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
9016                         CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9017                         hw_sob->sob_id * 4, 1, 0);
9018         if (rc)
9019                 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
9020
9021         kref_init(&hw_sob->kref);
9022 }
9023
9024 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9025 {
9026         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9027                                                         HL_POWER9_HOST_MAGIC) {
9028                 hdev->power9_64bit_dma_enable = 1;
9029                 hdev->dma_mask = 64;
9030         } else {
9031                 hdev->power9_64bit_dma_enable = 0;
9032                 hdev->dma_mask = 48;
9033         }
9034 }
9035
9036 static u64 gaudi_get_device_time(struct hl_device *hdev)
9037 {
9038         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9039
9040         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9041 }
9042
9043 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9044                                 u32 *block_size, u32 *block_id)
9045 {
9046         return -EPERM;
9047 }
9048
9049 static int gaudi_block_mmap(struct hl_device *hdev,
9050                                 struct vm_area_struct *vma,
9051                                 u32 block_id, u32 block_size)
9052 {
9053         return -EPERM;
9054 }
9055
9056 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9057 {
9058         struct cpu_dyn_regs *dyn_regs =
9059                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9060         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9061                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9062                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9063
9064         WREG32(irq_handler_offset,
9065                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9066 }
9067
9068 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9069 {
9070         switch (pll_idx) {
9071         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9072         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9073         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9074         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9075         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9076         case HL_GAUDI_MME_PLL: return MME_PLL;
9077         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9078         case HL_GAUDI_IF_PLL: return IF_PLL;
9079         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9080         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9081         default: return -EINVAL;
9082         }
9083 }
9084
9085 static int gaudi_add_sync_to_engine_map_entry(
9086         struct hl_sync_to_engine_map *map, u32 reg_value,
9087         enum hl_sync_engine_type engine_type, u32 engine_id)
9088 {
9089         struct hl_sync_to_engine_map_entry *entry;
9090
9091         /* Reg value represents a partial address of sync object,
9092          * it is used as unique identifier. For this we need to
9093          * clear the cutoff cfg base bits from the value.
9094          */
9095         if (reg_value == 0 || reg_value == 0xffffffff)
9096                 return 0;
9097         reg_value -= (u32)CFG_BASE;
9098
9099         /* create a new hash entry */
9100         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9101         if (!entry)
9102                 return -ENOMEM;
9103         entry->engine_type = engine_type;
9104         entry->engine_id = engine_id;
9105         entry->sync_id = reg_value;
9106         hash_add(map->tb, &entry->node, reg_value);
9107
9108         return 0;
9109 }
9110
9111 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9112                                 struct hl_sync_to_engine_map *map)
9113 {
9114         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9115         struct gaudi_device *gaudi = hdev->asic_specific;
9116         int i, j, rc;
9117         u32 reg_value;
9118
9119         /* Iterate over TPC engines */
9120         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9121                 /* TPC registered must be accessed with clock gating disabled */
9122                 mutex_lock(&gaudi->clk_gate_mutex);
9123                 hdev->asic_funcs->disable_clock_gating(hdev);
9124
9125                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9126                                         sds->props[SP_NEXT_TPC] * i);
9127
9128                 /* We can reenable clock_gating */
9129                 hdev->asic_funcs->set_clock_gating(hdev);
9130                 mutex_unlock(&gaudi->clk_gate_mutex);
9131
9132                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9133                                                         ENGINE_TPC, i);
9134                 if (rc)
9135                         goto free_sync_to_engine_map;
9136         }
9137
9138         /* Iterate over MME engines */
9139         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9140                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9141                         /* MME registered must be accessed with clock gating
9142                          * disabled
9143                          */
9144                         mutex_lock(&gaudi->clk_gate_mutex);
9145                         hdev->asic_funcs->disable_clock_gating(hdev);
9146
9147                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9148                                                 sds->props[SP_NEXT_MME] * i +
9149                                                 j * sizeof(u32));
9150
9151                         /* We can reenable clock_gating */
9152                         hdev->asic_funcs->set_clock_gating(hdev);
9153                         mutex_unlock(&gaudi->clk_gate_mutex);
9154
9155                         rc = gaudi_add_sync_to_engine_map_entry(
9156                                 map, reg_value, ENGINE_MME,
9157                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9158                         if (rc)
9159                                 goto free_sync_to_engine_map;
9160                 }
9161         }
9162
9163         /* Iterate over DMA engines */
9164         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9165                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9166                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9167                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9168                                                         ENGINE_DMA, i);
9169                 if (rc)
9170                         goto free_sync_to_engine_map;
9171         }
9172
9173         return 0;
9174
9175 free_sync_to_engine_map:
9176         hl_state_dump_free_sync_to_engine_map(map);
9177
9178         return rc;
9179 }
9180
9181 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9182 {
9183         return FIELD_GET(
9184                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9185                 mon->status);
9186 }
9187
9188 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9189                                 struct hl_device *hdev,
9190                                 struct hl_mon_state_dump *mon)
9191 {
9192         const char *name;
9193         char scratch_buf1[BIN_REG_STRING_SIZE],
9194                 scratch_buf2[BIN_REG_STRING_SIZE];
9195
9196         name = hl_state_dump_get_monitor_name(hdev, mon);
9197         if (!name)
9198                 name = "";
9199
9200         return hl_snprintf_resize(
9201                 buf, size, offset,
9202                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9203                 mon->id, name,
9204                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9205                                 mon->arm_data),
9206                 hl_format_as_binary(
9207                         scratch_buf1, sizeof(scratch_buf1),
9208                         FIELD_GET(
9209                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9210                                 mon->arm_data)),
9211                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9212                                 mon->arm_data),
9213                 mon->wr_data,
9214                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9215                 hl_format_as_binary(
9216                         scratch_buf2, sizeof(scratch_buf2),
9217                         FIELD_GET(
9218                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9219                                 mon->status)));
9220 }
9221
9222
9223 static int gaudi_print_fences_single_engine(
9224         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9225         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9226         size_t *size, size_t *offset)
9227 {
9228         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9229         int rc = -ENOMEM, i;
9230         u32 *statuses, *fences;
9231
9232         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9233                         sizeof(*statuses), GFP_KERNEL);
9234         if (!statuses)
9235                 goto out;
9236
9237         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9238                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9239                          sizeof(*fences), GFP_KERNEL);
9240         if (!fences)
9241                 goto free_status;
9242
9243         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9244                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9245
9246         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9247                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9248                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9249
9250         /* The actual print */
9251         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9252                 u32 fence_id;
9253                 u64 fence_cnt, fence_rdata;
9254                 const char *engine_name;
9255
9256                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9257                         statuses[i]))
9258                         continue;
9259
9260                 fence_id =
9261                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9262                 fence_cnt = base_offset + CFG_BASE +
9263                         sizeof(u32) *
9264                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9265                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9266                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9267                 engine_name = hl_sync_engine_to_string(engine_type);
9268
9269                 rc = hl_snprintf_resize(
9270                         buf, size, offset,
9271                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9272                         engine_name, engine_id,
9273                         i, fence_id,
9274                         fence_cnt, engine_name, engine_id, fence_id, i,
9275                         fence_rdata, engine_name, engine_id, fence_id, i,
9276                         fences[fence_id],
9277                         statuses[i]);
9278                 if (rc)
9279                         goto free_fences;
9280         }
9281
9282         rc = 0;
9283
9284 free_fences:
9285         kfree(fences);
9286 free_status:
9287         kfree(statuses);
9288 out:
9289         return rc;
9290 }
9291
9292
9293 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9294         .monitor_valid = gaudi_monitor_valid,
9295         .print_single_monitor = gaudi_print_single_monitor,
9296         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9297         .print_fences_single_engine = gaudi_print_fences_single_engine,
9298 };
9299
9300 static void gaudi_state_dump_init(struct hl_device *hdev)
9301 {
9302         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9303         int i;
9304
9305         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9306                 hash_add(sds->so_id_to_str_tb,
9307                         &gaudi_so_id_to_str[i].node,
9308                         gaudi_so_id_to_str[i].id);
9309
9310         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9311                 hash_add(sds->monitor_id_to_str_tb,
9312                         &gaudi_monitor_id_to_str[i].node,
9313                         gaudi_monitor_id_to_str[i].id);
9314
9315         sds->props = gaudi_state_dump_specs_props;
9316
9317         sds->sync_namager_names = gaudi_sync_manager_names;
9318
9319         sds->funcs = gaudi_state_dump_funcs;
9320 }
9321
9322 static const struct hl_asic_funcs gaudi_funcs = {
9323         .early_init = gaudi_early_init,
9324         .early_fini = gaudi_early_fini,
9325         .late_init = gaudi_late_init,
9326         .late_fini = gaudi_late_fini,
9327         .sw_init = gaudi_sw_init,
9328         .sw_fini = gaudi_sw_fini,
9329         .hw_init = gaudi_hw_init,
9330         .hw_fini = gaudi_hw_fini,
9331         .halt_engines = gaudi_halt_engines,
9332         .suspend = gaudi_suspend,
9333         .resume = gaudi_resume,
9334         .mmap = gaudi_mmap,
9335         .ring_doorbell = gaudi_ring_doorbell,
9336         .pqe_write = gaudi_pqe_write,
9337         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9338         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9339         .scrub_device_mem = gaudi_scrub_device_mem,
9340         .get_int_queue_base = gaudi_get_int_queue_base,
9341         .test_queues = gaudi_test_queues,
9342         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9343         .asic_dma_pool_free = gaudi_dma_pool_free,
9344         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9345         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9346         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9347         .cs_parser = gaudi_cs_parser,
9348         .asic_dma_map_sg = gaudi_dma_map_sg,
9349         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9350         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9351         .update_eq_ci = gaudi_update_eq_ci,
9352         .context_switch = gaudi_context_switch,
9353         .restore_phase_topology = gaudi_restore_phase_topology,
9354         .debugfs_read32 = gaudi_debugfs_read32,
9355         .debugfs_write32 = gaudi_debugfs_write32,
9356         .debugfs_read64 = gaudi_debugfs_read64,
9357         .debugfs_write64 = gaudi_debugfs_write64,
9358         .debugfs_read_dma = gaudi_debugfs_read_dma,
9359         .add_device_attr = gaudi_add_device_attr,
9360         .handle_eqe = gaudi_handle_eqe,
9361         .set_pll_profile = gaudi_set_pll_profile,
9362         .get_events_stat = gaudi_get_events_stat,
9363         .read_pte = gaudi_read_pte,
9364         .write_pte = gaudi_write_pte,
9365         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9366         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9367         .send_heartbeat = gaudi_send_heartbeat,
9368         .set_clock_gating = gaudi_set_clock_gating,
9369         .disable_clock_gating = gaudi_disable_clock_gating,
9370         .debug_coresight = gaudi_debug_coresight,
9371         .is_device_idle = gaudi_is_device_idle,
9372         .soft_reset_late_init = gaudi_soft_reset_late_init,
9373         .hw_queues_lock = gaudi_hw_queues_lock,
9374         .hw_queues_unlock = gaudi_hw_queues_unlock,
9375         .get_pci_id = gaudi_get_pci_id,
9376         .get_eeprom_data = gaudi_get_eeprom_data,
9377         .send_cpu_message = gaudi_send_cpu_message,
9378         .pci_bars_map = gaudi_pci_bars_map,
9379         .init_iatu = gaudi_init_iatu,
9380         .rreg = hl_rreg,
9381         .wreg = hl_wreg,
9382         .halt_coresight = gaudi_halt_coresight,
9383         .ctx_init = gaudi_ctx_init,
9384         .ctx_fini = gaudi_ctx_fini,
9385         .get_clk_rate = gaudi_get_clk_rate,
9386         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9387         .load_firmware_to_device = gaudi_load_firmware_to_device,
9388         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9389         .get_signal_cb_size = gaudi_get_signal_cb_size,
9390         .get_wait_cb_size = gaudi_get_wait_cb_size,
9391         .gen_signal_cb = gaudi_gen_signal_cb,
9392         .gen_wait_cb = gaudi_gen_wait_cb,
9393         .reset_sob = gaudi_reset_sob,
9394         .reset_sob_group = gaudi_reset_sob_group,
9395         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9396         .get_device_time = gaudi_get_device_time,
9397         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9398         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9399         .scramble_addr = hl_mmu_scramble_addr,
9400         .descramble_addr = hl_mmu_descramble_addr,
9401         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9402         .get_hw_block_id = gaudi_get_hw_block_id,
9403         .hw_block_mmap = gaudi_block_mmap,
9404         .enable_events_from_fw = gaudi_enable_events_from_fw,
9405         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9406         .init_firmware_loader = gaudi_init_firmware_loader,
9407         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9408         .state_dump_init = gaudi_state_dump_init
9409 };
9410
9411 /**
9412  * gaudi_set_asic_funcs - set GAUDI function pointers
9413  *
9414  * @hdev: pointer to hl_device structure
9415  *
9416  */
9417 void gaudi_set_asic_funcs(struct hl_device *hdev)
9418 {
9419         hdev->asic_funcs = &gaudi_funcs;
9420 }