habanalabs/gaudi: add monitored SOBs to state dump
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
114                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
115                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
116                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
117                 "gaudi cpu eq"
118 };
119
120 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
121         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
122         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
123         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
124         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
125         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
126         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
127         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
128         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
129 };
130
131 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
132         [0] = GAUDI_QUEUE_ID_DMA_0_0,
133         [1] = GAUDI_QUEUE_ID_DMA_0_1,
134         [2] = GAUDI_QUEUE_ID_DMA_0_2,
135         [3] = GAUDI_QUEUE_ID_DMA_0_3,
136         [4] = GAUDI_QUEUE_ID_DMA_1_0,
137         [5] = GAUDI_QUEUE_ID_DMA_1_1,
138         [6] = GAUDI_QUEUE_ID_DMA_1_2,
139         [7] = GAUDI_QUEUE_ID_DMA_1_3,
140 };
141
142 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
143         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
144         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
145         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
146         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
147         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
148         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
149         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
150         [PACKET_FENCE]          = sizeof(struct packet_fence),
151         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
152         [PACKET_NOP]            = sizeof(struct packet_nop),
153         [PACKET_STOP]           = sizeof(struct packet_stop),
154         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
155         [PACKET_WAIT]           = sizeof(struct packet_wait),
156         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
157 };
158
159 static inline bool validate_packet_id(enum packet_id id)
160 {
161         switch (id) {
162         case PACKET_WREG_32:
163         case PACKET_WREG_BULK:
164         case PACKET_MSG_LONG:
165         case PACKET_MSG_SHORT:
166         case PACKET_CP_DMA:
167         case PACKET_REPEAT:
168         case PACKET_MSG_PROT:
169         case PACKET_FENCE:
170         case PACKET_LIN_DMA:
171         case PACKET_NOP:
172         case PACKET_STOP:
173         case PACKET_ARB_POINT:
174         case PACKET_WAIT:
175         case PACKET_LOAD_AND_EXE:
176                 return true;
177         default:
178                 return false;
179         }
180 }
181
182 static const char * const
183 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
184         "tpc_address_exceed_slm",
185         "tpc_div_by_0",
186         "tpc_spu_mac_overflow",
187         "tpc_spu_addsub_overflow",
188         "tpc_spu_abs_overflow",
189         "tpc_spu_fp_dst_nan_inf",
190         "tpc_spu_fp_dst_denorm",
191         "tpc_vpu_mac_overflow",
192         "tpc_vpu_addsub_overflow",
193         "tpc_vpu_abs_overflow",
194         "tpc_vpu_fp_dst_nan_inf",
195         "tpc_vpu_fp_dst_denorm",
196         "tpc_assertions",
197         "tpc_illegal_instruction",
198         "tpc_pc_wrap_around",
199         "tpc_qm_sw_err",
200         "tpc_hbw_rresp_err",
201         "tpc_hbw_bresp_err",
202         "tpc_lbw_rresp_err",
203         "tpc_lbw_bresp_err"
204 };
205
206 static const char * const
207 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
208         "PQ AXI HBW error",
209         "CQ AXI HBW error",
210         "CP AXI HBW error",
211         "CP error due to undefined OPCODE",
212         "CP encountered STOP OPCODE",
213         "CP AXI LBW error",
214         "CP WRREG32 or WRBULK returned error",
215         "N/A",
216         "FENCE 0 inc over max value and clipped",
217         "FENCE 1 inc over max value and clipped",
218         "FENCE 2 inc over max value and clipped",
219         "FENCE 3 inc over max value and clipped",
220         "FENCE 0 dec under min value and clipped",
221         "FENCE 1 dec under min value and clipped",
222         "FENCE 2 dec under min value and clipped",
223         "FENCE 3 dec under min value and clipped"
224 };
225
226 static const char * const
227 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
228         "Choice push while full error",
229         "Choice Q watchdog error",
230         "MSG AXI LBW returned with error"
231 };
232
233 enum gaudi_sm_sei_cause {
234         GAUDI_SM_SEI_SO_OVERFLOW,
235         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
236         GAUDI_SM_SEI_AXI_RESPONSE_ERR
237 };
238
239 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
246         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
247         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
248         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
353 };
354
355 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
356         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
357         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
358         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
359         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
360         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
361         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
362         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
363         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
364         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
365         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
366         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
367         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
368         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
369         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
370         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
371         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
372         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
373         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
374         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
375         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
376         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
377         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
378         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
379         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
380         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
381         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
382         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
383 };
384
385 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
386         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
387         { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
388         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
389         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
390         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
391         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
392         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
393         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
394         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
395         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
396         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
397 };
398
399 static s64 gaudi_state_dump_specs_props[] = {
400         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
401         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
402         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
403         [SP_MON_OBJ_WR_ADDR_LOW] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
405         [SP_MON_OBJ_WR_ADDR_HIGH] =
406                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
407         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
408         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
409         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
410         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
411         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
412         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
413         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
414         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
415         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
416         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
417         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
418         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
419         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
420         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
421         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
422         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
423         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
424         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
425         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
426         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
427         [SP_FENCE0_CNT_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_FENCE0_RDATA_OFFSET] =
430                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
432         [SP_NUM_CORES] = 1,
433 };
434
435 /* The order here is opposite to the order of the indexing in the h/w.
436  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
437  */
438 static const char * const gaudi_sync_manager_names[] = {
439         "SYNC_MGR_E_N",
440         "SYNC_MGR_W_N",
441         "SYNC_MGR_E_S",
442         "SYNC_MGR_W_S",
443         NULL
444 };
445
446 struct ecc_info_extract_params {
447         u64 block_address;
448         u32 num_memories;
449         bool derr;
450         bool disable_clock_gating;
451 };
452
453 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
454                                                                 u64 phys_addr);
455 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
456                                         struct hl_cs_job *job);
457 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
458                                         u32 size, u64 val);
459 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
460                                         u32 num_regs, u32 val);
461 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
462                                 u32 tpc_id);
463 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
464 static int gaudi_cpucp_info_get(struct hl_device *hdev);
465 static void gaudi_disable_clock_gating(struct hl_device *hdev);
466 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
467 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
468                                 u32 size, bool eb);
469 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
470                                 struct hl_gen_wait_properties *prop);
471 static inline enum hl_collective_mode
472 get_collective_mode(struct hl_device *hdev, u32 queue_id)
473 {
474         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
475                 return HL_COLLECTIVE_MASTER;
476
477         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
478                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
479                 return HL_COLLECTIVE_SLAVE;
480
481         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
482                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
483                 return HL_COLLECTIVE_SLAVE;
484
485         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
486                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
487                 return HL_COLLECTIVE_SLAVE;
488
489         return HL_COLLECTIVE_NOT_SUPPORTED;
490 }
491
492 static inline void set_default_power_values(struct hl_device *hdev)
493 {
494         struct asic_fixed_properties *prop = &hdev->asic_prop;
495
496         if (hdev->card_type == cpucp_card_type_pmc) {
497                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
498                 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
499         } else {
500                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
501                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
502         }
503 }
504
505 static int gaudi_set_fixed_properties(struct hl_device *hdev)
506 {
507         struct asic_fixed_properties *prop = &hdev->asic_prop;
508         u32 num_sync_stream_queues = 0;
509         int i;
510
511         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
512         prop->hw_queues_props = kcalloc(prop->max_queues,
513                         sizeof(struct hw_queue_properties),
514                         GFP_KERNEL);
515
516         if (!prop->hw_queues_props)
517                 return -ENOMEM;
518
519         for (i = 0 ; i < prop->max_queues ; i++) {
520                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
521                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
522                         prop->hw_queues_props[i].driver_only = 0;
523                         prop->hw_queues_props[i].supports_sync_stream = 1;
524                         prop->hw_queues_props[i].cb_alloc_flags =
525                                 CB_ALLOC_KERNEL;
526                         num_sync_stream_queues++;
527                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
528                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
529                         prop->hw_queues_props[i].driver_only = 1;
530                         prop->hw_queues_props[i].supports_sync_stream = 0;
531                         prop->hw_queues_props[i].cb_alloc_flags =
532                                 CB_ALLOC_KERNEL;
533                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
534                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
535                         prop->hw_queues_props[i].driver_only = 0;
536                         prop->hw_queues_props[i].supports_sync_stream = 0;
537                         prop->hw_queues_props[i].cb_alloc_flags =
538                                 CB_ALLOC_USER;
539
540                 }
541                 prop->hw_queues_props[i].collective_mode =
542                                                 get_collective_mode(hdev, i);
543         }
544
545         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
546         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
547         prop->collective_first_sob = 0;
548         prop->collective_first_mon = 0;
549
550         /* 2 SOBs per internal queue stream are reserved for collective */
551         prop->sync_stream_first_sob =
552                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
553                         * QMAN_STREAMS * HL_RSVD_SOBS;
554
555         /* 1 monitor per internal queue stream are reserved for collective
556          * 2 monitors per external queue stream are reserved for collective
557          */
558         prop->sync_stream_first_mon =
559                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
560                         (NUMBER_OF_EXT_HW_QUEUES * 2);
561
562         prop->dram_base_address = DRAM_PHYS_BASE;
563         prop->dram_size = GAUDI_HBM_SIZE_32GB;
564         prop->dram_end_address = prop->dram_base_address +
565                                         prop->dram_size;
566         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
567
568         prop->sram_base_address = SRAM_BASE_ADDR;
569         prop->sram_size = SRAM_SIZE;
570         prop->sram_end_address = prop->sram_base_address +
571                                         prop->sram_size;
572         prop->sram_user_base_address = prop->sram_base_address +
573                                         SRAM_USER_BASE_OFFSET;
574
575         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
576         if (hdev->pldm)
577                 prop->mmu_pgt_size = 0x800000; /* 8MB */
578         else
579                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
580         prop->mmu_pte_size = HL_PTE_SIZE;
581         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
582         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
583         prop->dram_page_size = PAGE_SIZE_2MB;
584         prop->dram_supports_virtual_memory = false;
585
586         prop->pmmu.hop0_shift = HOP0_SHIFT;
587         prop->pmmu.hop1_shift = HOP1_SHIFT;
588         prop->pmmu.hop2_shift = HOP2_SHIFT;
589         prop->pmmu.hop3_shift = HOP3_SHIFT;
590         prop->pmmu.hop4_shift = HOP4_SHIFT;
591         prop->pmmu.hop0_mask = HOP0_MASK;
592         prop->pmmu.hop1_mask = HOP1_MASK;
593         prop->pmmu.hop2_mask = HOP2_MASK;
594         prop->pmmu.hop3_mask = HOP3_MASK;
595         prop->pmmu.hop4_mask = HOP4_MASK;
596         prop->pmmu.start_addr = VA_HOST_SPACE_START;
597         prop->pmmu.end_addr =
598                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
599         prop->pmmu.page_size = PAGE_SIZE_4KB;
600         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
601
602         /* PMMU and HPMMU are the same except of page size */
603         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
604         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
605
606         /* shifts and masks are the same in PMMU and DMMU */
607         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
608         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
609         prop->dmmu.end_addr = VA_HOST_SPACE_END;
610         prop->dmmu.page_size = PAGE_SIZE_2MB;
611
612         prop->cfg_size = CFG_SIZE;
613         prop->max_asid = MAX_ASID;
614         prop->num_of_events = GAUDI_EVENT_SIZE;
615         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
616
617         set_default_power_values(hdev);
618
619         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
620         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
621
622         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
623         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
624
625         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
626                                         CARD_NAME_MAX_LEN);
627
628         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
629
630         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
631                         prop->sync_stream_first_sob +
632                         (num_sync_stream_queues * HL_RSVD_SOBS);
633         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
634                         prop->sync_stream_first_mon +
635                         (num_sync_stream_queues * HL_RSVD_MONS);
636
637         prop->first_available_user_msix_interrupt = USHRT_MAX;
638
639         for (i = 0 ; i < HL_MAX_DCORES ; i++)
640                 prop->first_available_cq[i] = USHRT_MAX;
641
642         prop->fw_cpu_boot_dev_sts0_valid = false;
643         prop->fw_cpu_boot_dev_sts1_valid = false;
644         prop->hard_reset_done_by_fw = false;
645         prop->gic_interrupts_enable = true;
646
647         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
648
649         return 0;
650 }
651
652 static int gaudi_pci_bars_map(struct hl_device *hdev)
653 {
654         static const char * const name[] = {"SRAM", "CFG", "HBM"};
655         bool is_wc[3] = {false, false, true};
656         int rc;
657
658         rc = hl_pci_bars_map(hdev, name, is_wc);
659         if (rc)
660                 return rc;
661
662         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
663                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
664
665         return 0;
666 }
667
668 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
669 {
670         struct gaudi_device *gaudi = hdev->asic_specific;
671         struct hl_inbound_pci_region pci_region;
672         u64 old_addr = addr;
673         int rc;
674
675         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
676                 return old_addr;
677
678         if (hdev->asic_prop.iatu_done_by_fw)
679                 return U64_MAX;
680
681         /* Inbound Region 2 - Bar 4 - Point to HBM */
682         pci_region.mode = PCI_BAR_MATCH_MODE;
683         pci_region.bar = HBM_BAR_ID;
684         pci_region.addr = addr;
685         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
686         if (rc)
687                 return U64_MAX;
688
689         if (gaudi) {
690                 old_addr = gaudi->hbm_bar_cur_addr;
691                 gaudi->hbm_bar_cur_addr = addr;
692         }
693
694         return old_addr;
695 }
696
697 static int gaudi_init_iatu(struct hl_device *hdev)
698 {
699         struct hl_inbound_pci_region inbound_region;
700         struct hl_outbound_pci_region outbound_region;
701         int rc;
702
703         if (hdev->asic_prop.iatu_done_by_fw)
704                 return 0;
705
706         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
707         inbound_region.mode = PCI_BAR_MATCH_MODE;
708         inbound_region.bar = SRAM_BAR_ID;
709         inbound_region.addr = SRAM_BASE_ADDR;
710         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
711         if (rc)
712                 goto done;
713
714         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
715         inbound_region.mode = PCI_BAR_MATCH_MODE;
716         inbound_region.bar = CFG_BAR_ID;
717         inbound_region.addr = SPI_FLASH_BASE_ADDR;
718         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
719         if (rc)
720                 goto done;
721
722         /* Inbound Region 2 - Bar 4 - Point to HBM */
723         inbound_region.mode = PCI_BAR_MATCH_MODE;
724         inbound_region.bar = HBM_BAR_ID;
725         inbound_region.addr = DRAM_PHYS_BASE;
726         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
727         if (rc)
728                 goto done;
729
730         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
731
732         /* Outbound Region 0 - Point to Host */
733         outbound_region.addr = HOST_PHYS_BASE;
734         outbound_region.size = HOST_PHYS_SIZE;
735         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
736
737 done:
738         return rc;
739 }
740
741 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
742 {
743         return RREG32(mmHW_STATE);
744 }
745
746 static int gaudi_early_init(struct hl_device *hdev)
747 {
748         struct asic_fixed_properties *prop = &hdev->asic_prop;
749         struct pci_dev *pdev = hdev->pdev;
750         u32 fw_boot_status;
751         int rc;
752
753         rc = gaudi_set_fixed_properties(hdev);
754         if (rc) {
755                 dev_err(hdev->dev, "Failed setting fixed properties\n");
756                 return rc;
757         }
758
759         /* Check BAR sizes */
760         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
761                 dev_err(hdev->dev,
762                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
763                         SRAM_BAR_ID,
764                         (unsigned long long) pci_resource_len(pdev,
765                                                         SRAM_BAR_ID),
766                         SRAM_BAR_SIZE);
767                 rc = -ENODEV;
768                 goto free_queue_props;
769         }
770
771         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
772                 dev_err(hdev->dev,
773                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
774                         CFG_BAR_ID,
775                         (unsigned long long) pci_resource_len(pdev,
776                                                                 CFG_BAR_ID),
777                         CFG_BAR_SIZE);
778                 rc = -ENODEV;
779                 goto free_queue_props;
780         }
781
782         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
783
784         /* If FW security is enabled at this point it means no access to ELBI */
785         if (hdev->asic_prop.fw_security_enabled) {
786                 hdev->asic_prop.iatu_done_by_fw = true;
787
788                 /*
789                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
790                  * decision can only be taken based on PCI ID security.
791                  */
792                 hdev->asic_prop.gic_interrupts_enable = false;
793                 goto pci_init;
794         }
795
796         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
797                                 &fw_boot_status);
798         if (rc)
799                 goto free_queue_props;
800
801         /* Check whether FW is configuring iATU */
802         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
803                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
804                 hdev->asic_prop.iatu_done_by_fw = true;
805
806 pci_init:
807         rc = hl_pci_init(hdev);
808         if (rc)
809                 goto free_queue_props;
810
811         /* Before continuing in the initialization, we need to read the preboot
812          * version to determine whether we run with a security-enabled firmware
813          */
814         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
815                                         mmCPU_BOOT_DEV_STS0,
816                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
817                                         mmCPU_BOOT_ERR1,
818                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
819         if (rc) {
820                 if (hdev->reset_on_preboot_fail)
821                         hdev->asic_funcs->hw_fini(hdev, true);
822                 goto pci_fini;
823         }
824
825         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
826                 dev_info(hdev->dev,
827                         "H/W state is dirty, must reset before initializing\n");
828                 hdev->asic_funcs->hw_fini(hdev, true);
829         }
830
831         return 0;
832
833 pci_fini:
834         hl_pci_fini(hdev);
835 free_queue_props:
836         kfree(hdev->asic_prop.hw_queues_props);
837         return rc;
838 }
839
840 static int gaudi_early_fini(struct hl_device *hdev)
841 {
842         kfree(hdev->asic_prop.hw_queues_props);
843         hl_pci_fini(hdev);
844
845         return 0;
846 }
847
848 /**
849  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
850  *
851  * @hdev: pointer to hl_device structure
852  *
853  */
854 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
855 {
856         struct asic_fixed_properties *prop = &hdev->asic_prop;
857         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
858         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
859         int rc;
860
861         if (hdev->asic_prop.fw_security_enabled) {
862                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
863
864                 if (rc)
865                         return rc;
866
867                 freq = pll_freq_arr[2];
868         } else {
869                 /* Backward compatibility */
870                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
871                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
872                 nr = RREG32(mmPSOC_CPU_PLL_NR);
873                 nf = RREG32(mmPSOC_CPU_PLL_NF);
874                 od = RREG32(mmPSOC_CPU_PLL_OD);
875
876                 if (div_sel == DIV_SEL_REF_CLK ||
877                                 div_sel == DIV_SEL_DIVIDED_REF) {
878                         if (div_sel == DIV_SEL_REF_CLK)
879                                 freq = PLL_REF_CLK;
880                         else
881                                 freq = PLL_REF_CLK / (div_fctr + 1);
882                 } else if (div_sel == DIV_SEL_PLL_CLK ||
883                         div_sel == DIV_SEL_DIVIDED_PLL) {
884                         pll_clk = PLL_REF_CLK * (nf + 1) /
885                                         ((nr + 1) * (od + 1));
886                         if (div_sel == DIV_SEL_PLL_CLK)
887                                 freq = pll_clk;
888                         else
889                                 freq = pll_clk / (div_fctr + 1);
890                 } else {
891                         dev_warn(hdev->dev,
892                                 "Received invalid div select value: %d",
893                                 div_sel);
894                         freq = 0;
895                 }
896         }
897
898         prop->psoc_timestamp_frequency = freq;
899         prop->psoc_pci_pll_nr = nr;
900         prop->psoc_pci_pll_nf = nf;
901         prop->psoc_pci_pll_od = od;
902         prop->psoc_pci_pll_div_factor = div_fctr;
903
904         return 0;
905 }
906
907 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
908                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
909 {
910         struct asic_fixed_properties *prop = &hdev->asic_prop;
911         struct packet_lin_dma *init_tpc_mem_pkt;
912         struct hl_cs_job *job;
913         struct hl_cb *cb;
914         u64 dst_addr;
915         u32 cb_size, ctl;
916         u8 tpc_id;
917         int rc;
918
919         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
920         if (!cb)
921                 return -EFAULT;
922
923         init_tpc_mem_pkt = cb->kernel_address;
924         cb_size = sizeof(*init_tpc_mem_pkt);
925         memset(init_tpc_mem_pkt, 0, cb_size);
926
927         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
928
929         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
930         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
931         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
932         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
933
934         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
935
936         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
937         dst_addr = (prop->sram_user_base_address &
938                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
939                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
940         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
941
942         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
943         if (!job) {
944                 dev_err(hdev->dev, "Failed to allocate a new job\n");
945                 rc = -ENOMEM;
946                 goto release_cb;
947         }
948
949         job->id = 0;
950         job->user_cb = cb;
951         atomic_inc(&job->user_cb->cs_cnt);
952         job->user_cb_size = cb_size;
953         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
954         job->patched_cb = job->user_cb;
955         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
956
957         hl_debugfs_add_job(hdev, job);
958
959         rc = gaudi_send_job_on_qman0(hdev, job);
960
961         if (rc)
962                 goto free_job;
963
964         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
965                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
966                 if (rc)
967                         break;
968         }
969
970 free_job:
971         hl_userptr_delete_list(hdev, &job->userptr_list);
972         hl_debugfs_remove_job(hdev, job);
973         kfree(job);
974         atomic_dec(&cb->cs_cnt);
975
976 release_cb:
977         hl_cb_put(cb);
978         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
979
980         return rc;
981 }
982
983 /*
984  * gaudi_init_tpc_mem() - Initialize TPC memories.
985  * @hdev: Pointer to hl_device structure.
986  *
987  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
988  *
989  * Return: 0 for success, negative value for error.
990  */
991 static int gaudi_init_tpc_mem(struct hl_device *hdev)
992 {
993         const struct firmware *fw;
994         size_t fw_size;
995         void *cpu_addr;
996         dma_addr_t dma_handle;
997         int rc, count = 5;
998
999 again:
1000         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1001         if (rc == -EINTR && count-- > 0) {
1002                 msleep(50);
1003                 goto again;
1004         }
1005
1006         if (rc) {
1007                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1008                                 GAUDI_TPC_FW_FILE);
1009                 goto out;
1010         }
1011
1012         fw_size = fw->size;
1013         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1014                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1015         if (!cpu_addr) {
1016                 dev_err(hdev->dev,
1017                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1018                         fw_size);
1019                 rc = -ENOMEM;
1020                 goto out;
1021         }
1022
1023         memcpy(cpu_addr, fw->data, fw_size);
1024
1025         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1026
1027         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1028                         dma_handle);
1029
1030 out:
1031         release_firmware(fw);
1032         return rc;
1033 }
1034
1035 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1036 {
1037         struct gaudi_device *gaudi = hdev->asic_specific;
1038         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1039         struct hl_hw_queue *q;
1040         u32 i, sob_id, sob_group_id, queue_id;
1041
1042         /* Iterate through SOB groups and assign a SOB for each slave queue */
1043         sob_group_id =
1044                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1045         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1046
1047         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1048         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1049                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1050                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1051         }
1052
1053         /* Both DMA5 and TPC7 use the same resources since only a single
1054          * engine need to participate in the reduction process
1055          */
1056         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1057         q = &hdev->kernel_queues[queue_id];
1058         q->sync_stream_prop.collective_sob_id =
1059                         sob_id + NIC_NUMBER_OF_ENGINES;
1060
1061         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1062         q = &hdev->kernel_queues[queue_id];
1063         q->sync_stream_prop.collective_sob_id =
1064                         sob_id + NIC_NUMBER_OF_ENGINES;
1065 }
1066
1067 static void gaudi_sob_group_hw_reset(struct kref *ref)
1068 {
1069         struct gaudi_hw_sob_group *hw_sob_group =
1070                 container_of(ref, struct gaudi_hw_sob_group, kref);
1071         struct hl_device *hdev = hw_sob_group->hdev;
1072         int i;
1073
1074         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1075                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1076                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1077
1078         kref_init(&hw_sob_group->kref);
1079 }
1080
1081 static void gaudi_sob_group_reset_error(struct kref *ref)
1082 {
1083         struct gaudi_hw_sob_group *hw_sob_group =
1084                 container_of(ref, struct gaudi_hw_sob_group, kref);
1085         struct hl_device *hdev = hw_sob_group->hdev;
1086
1087         dev_crit(hdev->dev,
1088                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1089                 hw_sob_group->base_sob_id);
1090 }
1091
1092 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1093 {
1094         struct gaudi_collective_properties *prop;
1095         int i;
1096
1097         prop = &gaudi->collective_props;
1098
1099         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1100
1101         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1102                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1103                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1104                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1105         /* Set collective engine bit */
1106         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1107                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1108 }
1109
1110 static int gaudi_collective_init(struct hl_device *hdev)
1111 {
1112         u32 i, sob_id, reserved_sobs_per_group;
1113         struct gaudi_collective_properties *prop;
1114         struct gaudi_device *gaudi;
1115
1116         gaudi = hdev->asic_specific;
1117         prop = &gaudi->collective_props;
1118         sob_id = hdev->asic_prop.collective_first_sob;
1119
1120         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1121         reserved_sobs_per_group =
1122                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1123
1124         /* Init SOB groups */
1125         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1126                 prop->hw_sob_group[i].hdev = hdev;
1127                 prop->hw_sob_group[i].base_sob_id = sob_id;
1128                 sob_id += reserved_sobs_per_group;
1129                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1130         }
1131
1132         for (i = 0 ; i < QMAN_STREAMS; i++) {
1133                 prop->next_sob_group_val[i] = 1;
1134                 prop->curr_sob_group_idx[i] = 0;
1135                 gaudi_collective_map_sobs(hdev, i);
1136         }
1137
1138         gaudi_collective_mstr_sob_mask_set(gaudi);
1139
1140         return 0;
1141 }
1142
1143 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1144 {
1145         struct gaudi_device *gaudi = hdev->asic_specific;
1146         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1147
1148         kref_put(&cprop->hw_sob_group[sob_group].kref,
1149                                         gaudi_sob_group_hw_reset);
1150 }
1151
1152 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1153                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1154 {
1155         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1156         struct gaudi_collective_properties *cprop;
1157         struct hl_gen_wait_properties wait_prop;
1158         struct hl_sync_stream_properties *prop;
1159         struct gaudi_device *gaudi;
1160
1161         gaudi = hdev->asic_specific;
1162         cprop = &gaudi->collective_props;
1163         queue_id = job->hw_queue_id;
1164         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1165
1166         master_sob_base =
1167                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1168         master_monitor = prop->collective_mstr_mon_id[0];
1169
1170         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1171
1172         dev_dbg(hdev->dev,
1173                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1174                 master_sob_base, cprop->mstr_sob_mask[0],
1175                 cprop->next_sob_group_val[stream],
1176                 master_monitor, queue_id);
1177
1178         wait_prop.data = (void *) job->patched_cb;
1179         wait_prop.sob_base = master_sob_base;
1180         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1181         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1182         wait_prop.mon_id = master_monitor;
1183         wait_prop.q_idx = queue_id;
1184         wait_prop.size = cb_size;
1185         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1186
1187         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1188         master_monitor = prop->collective_mstr_mon_id[1];
1189
1190         dev_dbg(hdev->dev,
1191                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1192                 master_sob_base, cprop->mstr_sob_mask[1],
1193                 cprop->next_sob_group_val[stream],
1194                 master_monitor, queue_id);
1195
1196         wait_prop.sob_base = master_sob_base;
1197         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1198         wait_prop.mon_id = master_monitor;
1199         wait_prop.size = cb_size;
1200         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1201 }
1202
1203 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1204                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1205 {
1206         struct hl_gen_wait_properties wait_prop;
1207         struct hl_sync_stream_properties *prop;
1208         u32 queue_id, cb_size = 0;
1209
1210         queue_id = job->hw_queue_id;
1211         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1212
1213         if (job->cs->encaps_signals) {
1214                 /* use the encaps signal handle store earlier in the flow
1215                  * and set the SOB information from the encaps
1216                  * signals handle
1217                  */
1218                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1219                                                 cs_cmpl);
1220
1221                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1222                                 job->cs->sequence,
1223                                 cs_cmpl->hw_sob->sob_id,
1224                                 cs_cmpl->sob_val);
1225         }
1226
1227         /* Add to wait CBs using slave monitor */
1228         wait_prop.data = (void *) job->user_cb;
1229         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1230         wait_prop.sob_mask = 0x1;
1231         wait_prop.sob_val = cs_cmpl->sob_val;
1232         wait_prop.mon_id = prop->collective_slave_mon_id;
1233         wait_prop.q_idx = queue_id;
1234         wait_prop.size = cb_size;
1235
1236         dev_dbg(hdev->dev,
1237                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1238                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1239                 prop->collective_slave_mon_id, queue_id);
1240
1241         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1242
1243         dev_dbg(hdev->dev,
1244                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1245                 prop->collective_sob_id, queue_id);
1246
1247         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1248                         prop->collective_sob_id, cb_size, false);
1249 }
1250
1251 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1252 {
1253         struct hl_cs_compl *signal_cs_cmpl =
1254                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1255         struct hl_cs_compl *cs_cmpl =
1256                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1257         struct gaudi_collective_properties *cprop;
1258         u32 stream, queue_id, sob_group_offset;
1259         struct gaudi_device *gaudi;
1260         struct hl_device *hdev;
1261         struct hl_cs_job *job;
1262         struct hl_ctx *ctx;
1263
1264         ctx = cs->ctx;
1265         hdev = ctx->hdev;
1266         gaudi = hdev->asic_specific;
1267         cprop = &gaudi->collective_props;
1268
1269         /* In encaps signals case the SOB info will be retrieved from
1270          * the handle in gaudi_collective_slave_init_job.
1271          */
1272         if (!cs->encaps_signals) {
1273                 /* copy the SOB id and value of the signal CS */
1274                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1275                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1276         }
1277
1278         /* check again if the signal cs already completed.
1279          * if yes then don't send any wait cs since the hw_sob
1280          * could be in reset already. if signal is not completed
1281          * then get refcount to hw_sob to prevent resetting the sob
1282          * while wait cs is not submitted.
1283          * note that this check is protected by two locks,
1284          * hw queue lock and completion object lock,
1285          * and the same completion object lock also protects
1286          * the hw_sob reset handler function.
1287          * The hw_queue lock prevent out of sync of hw_sob
1288          * refcount value, changed by signal/wait flows.
1289          */
1290         spin_lock(&signal_cs_cmpl->lock);
1291
1292         if (completion_done(&cs->signal_fence->completion)) {
1293                 spin_unlock(&signal_cs_cmpl->lock);
1294                 return -EINVAL;
1295         }
1296         /* Increment kref since all slave queues are now waiting on it */
1297         kref_get(&cs_cmpl->hw_sob->kref);
1298
1299         spin_unlock(&signal_cs_cmpl->lock);
1300
1301         /* Calculate the stream from collective master queue (1st job) */
1302         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1303         stream = job->hw_queue_id % 4;
1304         sob_group_offset =
1305                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1306
1307         list_for_each_entry(job, &cs->job_list, cs_node) {
1308                 queue_id = job->hw_queue_id;
1309
1310                 if (hdev->kernel_queues[queue_id].collective_mode ==
1311                                 HL_COLLECTIVE_MASTER)
1312                         gaudi_collective_master_init_job(hdev, job, stream,
1313                                                 sob_group_offset);
1314                 else
1315                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1316         }
1317
1318         cs_cmpl->sob_group = sob_group_offset;
1319
1320         /* Handle sob group kref and wraparound */
1321         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1322         cprop->next_sob_group_val[stream]++;
1323
1324         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1325                 /*
1326                  * Decrement as we reached the max value.
1327                  * The release function won't be called here as we've
1328                  * just incremented the refcount.
1329                  */
1330                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1331                                 gaudi_sob_group_reset_error);
1332                 cprop->next_sob_group_val[stream] = 1;
1333                 /* only two SOBs are currently in use */
1334                 cprop->curr_sob_group_idx[stream] =
1335                         (cprop->curr_sob_group_idx[stream] + 1) &
1336                                                         (HL_RSVD_SOBS - 1);
1337
1338                 gaudi_collective_map_sobs(hdev, stream);
1339
1340                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1341                                 cprop->curr_sob_group_idx[stream], stream);
1342         }
1343
1344         mb();
1345         hl_fence_put(cs->signal_fence);
1346         cs->signal_fence = NULL;
1347
1348         return 0;
1349 }
1350
1351 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1352                 struct hl_ctx *ctx, struct hl_cs *cs,
1353                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1354                 u32 encaps_signal_offset)
1355 {
1356         struct hw_queue_properties *hw_queue_prop;
1357         struct hl_cs_counters_atomic *cntr;
1358         struct hl_cs_job *job;
1359         struct hl_cb *cb;
1360         u32 cb_size;
1361         bool patched_cb;
1362
1363         cntr = &hdev->aggregated_cs_counters;
1364
1365         if (mode == HL_COLLECTIVE_MASTER) {
1366                 /* CB size of collective master queue contains
1367                  * 4 msg short packets for monitor 1 configuration
1368                  * 1 fence packet
1369                  * 4 msg short packets for monitor 2 configuration
1370                  * 1 fence packet
1371                  * 2 msg prot packets for completion and MSI-X
1372                  */
1373                 cb_size = sizeof(struct packet_msg_short) * 8 +
1374                                 sizeof(struct packet_fence) * 2 +
1375                                 sizeof(struct packet_msg_prot) * 2;
1376                 patched_cb = true;
1377         } else {
1378                 /* CB size of collective slave queues contains
1379                  * 4 msg short packets for monitor configuration
1380                  * 1 fence packet
1381                  * 1 additional msg short packet for sob signal
1382                  */
1383                 cb_size = sizeof(struct packet_msg_short) * 5 +
1384                                 sizeof(struct packet_fence);
1385                 patched_cb = false;
1386         }
1387
1388         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1389         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1390         if (!job) {
1391                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1392                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1393                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1394                 return -ENOMEM;
1395         }
1396
1397         /* Allocate internal mapped CB for non patched CBs */
1398         cb = hl_cb_kernel_create(hdev, cb_size,
1399                         hdev->mmu_enable && !patched_cb);
1400         if (!cb) {
1401                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1402                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1403                 kfree(job);
1404                 return -EFAULT;
1405         }
1406
1407         job->id = 0;
1408         job->cs = cs;
1409         job->user_cb = cb;
1410         atomic_inc(&job->user_cb->cs_cnt);
1411         job->user_cb_size = cb_size;
1412         job->hw_queue_id = queue_id;
1413
1414         /* since its guaranteed to have only one chunk in the collective wait
1415          * cs, we can use this chunk to set the encapsulated signal offset
1416          * in the jobs.
1417          */
1418         if (cs->encaps_signals)
1419                 job->encaps_sig_wait_offset = encaps_signal_offset;
1420
1421         /*
1422          * No need in parsing, user CB is the patched CB.
1423          * We call hl_cb_destroy() out of two reasons - we don't need
1424          * the CB in the CB idr anymore and to decrement its refcount as
1425          * it was incremented inside hl_cb_kernel_create().
1426          */
1427         if (patched_cb)
1428                 job->patched_cb = job->user_cb;
1429         else
1430                 job->patched_cb = NULL;
1431
1432         job->job_cb_size = job->user_cb_size;
1433         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1434
1435         /* increment refcount as for external queues we get completion */
1436         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1437                 cs_get(cs);
1438
1439         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1440
1441         list_add_tail(&job->cs_node, &cs->job_list);
1442
1443         hl_debugfs_add_job(hdev, job);
1444
1445         return 0;
1446 }
1447
1448 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1449                 struct hl_ctx *ctx, struct hl_cs *cs,
1450                 u32 wait_queue_id, u32 collective_engine_id,
1451                 u32 encaps_signal_offset)
1452 {
1453         struct gaudi_device *gaudi = hdev->asic_specific;
1454         struct hw_queue_properties *hw_queue_prop;
1455         u32 queue_id, collective_queue, num_jobs;
1456         u32 stream, nic_queue, nic_idx = 0;
1457         bool skip;
1458         int i, rc = 0;
1459
1460         /* Verify wait queue id is configured as master */
1461         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1462         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1463                 dev_err(hdev->dev,
1464                         "Queue %d is not configured as collective master\n",
1465                         wait_queue_id);
1466                 return -EINVAL;
1467         }
1468
1469         /* Verify engine id is supported */
1470         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1471                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1472                 dev_err(hdev->dev,
1473                         "Collective wait does not support engine %u\n",
1474                         collective_engine_id);
1475                 return -EINVAL;
1476         }
1477
1478         stream = wait_queue_id % 4;
1479
1480         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1481                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1482         else
1483                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1484
1485         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1486         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1487
1488         /* First job goes to the collective master queue, it will wait for
1489          * the collective slave queues to finish execution.
1490          * The synchronization is done using two monitors:
1491          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1492          * reduction engine (DMA5/TPC7).
1493          *
1494          * Rest of the jobs goes to the collective slave queues which will
1495          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1496          */
1497         for (i = 0 ; i < num_jobs ; i++) {
1498                 if (i == 0) {
1499                         queue_id = wait_queue_id;
1500                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1501                                 HL_COLLECTIVE_MASTER, queue_id,
1502                                 wait_queue_id, encaps_signal_offset);
1503                 } else {
1504                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1505                                 if (gaudi->hw_cap_initialized &
1506                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1507                                         skip = false;
1508                                 else
1509                                         skip = true;
1510
1511                                 queue_id = nic_queue;
1512                                 nic_queue += 4;
1513                                 nic_idx++;
1514
1515                                 if (skip)
1516                                         continue;
1517                         } else {
1518                                 queue_id = collective_queue;
1519                         }
1520
1521                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1522                                 HL_COLLECTIVE_SLAVE, queue_id,
1523                                 wait_queue_id, encaps_signal_offset);
1524                 }
1525
1526                 if (rc)
1527                         return rc;
1528         }
1529
1530         return rc;
1531 }
1532
1533 static int gaudi_late_init(struct hl_device *hdev)
1534 {
1535         struct gaudi_device *gaudi = hdev->asic_specific;
1536         int rc;
1537
1538         rc = gaudi->cpucp_info_get(hdev);
1539         if (rc) {
1540                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1541                 return rc;
1542         }
1543
1544         if ((hdev->card_type == cpucp_card_type_pci) &&
1545                         (hdev->nic_ports_mask & 0x3)) {
1546                 dev_info(hdev->dev,
1547                         "PCI card detected, only 8 ports are enabled\n");
1548                 hdev->nic_ports_mask &= ~0x3;
1549
1550                 /* Stop and disable unused NIC QMANs */
1551                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1552                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1553                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1554
1555                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1556                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1557                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1558
1559                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1560                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1561
1562                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1563         }
1564
1565         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1566         if (rc) {
1567                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1568                 return rc;
1569         }
1570
1571         /* Scrub both SRAM and DRAM */
1572         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1573         if (rc)
1574                 goto disable_pci_access;
1575
1576         rc = gaudi_fetch_psoc_frequency(hdev);
1577         if (rc) {
1578                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1579                 goto disable_pci_access;
1580         }
1581
1582         rc = gaudi_mmu_clear_pgt_range(hdev);
1583         if (rc) {
1584                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1585                 goto disable_pci_access;
1586         }
1587
1588         rc = gaudi_init_tpc_mem(hdev);
1589         if (rc) {
1590                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1591                 goto disable_pci_access;
1592         }
1593
1594         rc = gaudi_collective_init(hdev);
1595         if (rc) {
1596                 dev_err(hdev->dev, "Failed to init collective\n");
1597                 goto disable_pci_access;
1598         }
1599
1600         /* We only support a single ASID for the user, so for the sake of optimization, just
1601          * initialize the ASID one time during device initialization with the fixed value of 1
1602          */
1603         gaudi_mmu_prepare(hdev, 1);
1604
1605         return 0;
1606
1607 disable_pci_access:
1608         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1609
1610         return rc;
1611 }
1612
1613 static void gaudi_late_fini(struct hl_device *hdev)
1614 {
1615         const struct hwmon_channel_info **channel_info_arr;
1616         int i = 0;
1617
1618         if (!hdev->hl_chip_info->info)
1619                 return;
1620
1621         channel_info_arr = hdev->hl_chip_info->info;
1622
1623         while (channel_info_arr[i]) {
1624                 kfree(channel_info_arr[i]->config);
1625                 kfree(channel_info_arr[i]);
1626                 i++;
1627         }
1628
1629         kfree(channel_info_arr);
1630
1631         hdev->hl_chip_info->info = NULL;
1632 }
1633
1634 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1635 {
1636         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1637         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1638         int i, j, rc = 0;
1639
1640         /*
1641          * The device CPU works with 40-bits addresses, while bit 39 must be set
1642          * to '1' when accessing the host.
1643          * Bits 49:39 of the full host address are saved for a later
1644          * configuration of the HW to perform extension to 50 bits.
1645          * Because there is a single HW register that holds the extension bits,
1646          * these bits must be identical in all allocated range.
1647          */
1648
1649         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1650                 virt_addr_arr[i] =
1651                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1652                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1653                                                 &dma_addr_arr[i],
1654                                                 GFP_KERNEL | __GFP_ZERO);
1655                 if (!virt_addr_arr[i]) {
1656                         rc = -ENOMEM;
1657                         goto free_dma_mem_arr;
1658                 }
1659
1660                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1661                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1662                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1663                         break;
1664         }
1665
1666         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1667                 dev_err(hdev->dev,
1668                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1669                 rc = -EFAULT;
1670                 goto free_dma_mem_arr;
1671         }
1672
1673         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1674         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1675         hdev->cpu_pci_msb_addr =
1676                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1677
1678         if (!hdev->asic_prop.fw_security_enabled)
1679                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1680
1681 free_dma_mem_arr:
1682         for (j = 0 ; j < i ; j++)
1683                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1684                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1685                                                 virt_addr_arr[j],
1686                                                 dma_addr_arr[j]);
1687
1688         return rc;
1689 }
1690
1691 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1692 {
1693         struct gaudi_device *gaudi = hdev->asic_specific;
1694         struct gaudi_internal_qman_info *q;
1695         u32 i;
1696
1697         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1698                 q = &gaudi->internal_qmans[i];
1699                 if (!q->pq_kernel_addr)
1700                         continue;
1701                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1702                                                         q->pq_kernel_addr,
1703                                                         q->pq_dma_addr);
1704         }
1705 }
1706
1707 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1708 {
1709         struct gaudi_device *gaudi = hdev->asic_specific;
1710         struct gaudi_internal_qman_info *q;
1711         int rc, i;
1712
1713         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1714                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1715                         continue;
1716
1717                 q = &gaudi->internal_qmans[i];
1718
1719                 switch (i) {
1720                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1721                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1722                         break;
1723                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1724                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1725                         break;
1726                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1727                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1728                         break;
1729                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1730                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1731                         break;
1732                 default:
1733                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1734                         rc = -EINVAL;
1735                         goto free_internal_qmans_pq_mem;
1736                 }
1737
1738                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1739                                                 hdev, q->pq_size,
1740                                                 &q->pq_dma_addr,
1741                                                 GFP_KERNEL | __GFP_ZERO);
1742                 if (!q->pq_kernel_addr) {
1743                         rc = -ENOMEM;
1744                         goto free_internal_qmans_pq_mem;
1745                 }
1746         }
1747
1748         return 0;
1749
1750 free_internal_qmans_pq_mem:
1751         gaudi_free_internal_qmans_pq_mem(hdev);
1752         return rc;
1753 }
1754
1755 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1756 {
1757         struct asic_fixed_properties *prop = &hdev->asic_prop;
1758         struct pci_mem_region *region;
1759
1760         /* CFG */
1761         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1762         region->region_base = CFG_BASE;
1763         region->region_size = CFG_SIZE;
1764         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1765         region->bar_size = CFG_BAR_SIZE;
1766         region->bar_id = CFG_BAR_ID;
1767         region->used = 1;
1768
1769         /* SRAM */
1770         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1771         region->region_base = SRAM_BASE_ADDR;
1772         region->region_size = SRAM_SIZE;
1773         region->offset_in_bar = 0;
1774         region->bar_size = SRAM_BAR_SIZE;
1775         region->bar_id = SRAM_BAR_ID;
1776         region->used = 1;
1777
1778         /* DRAM */
1779         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1780         region->region_base = DRAM_PHYS_BASE;
1781         region->region_size = hdev->asic_prop.dram_size;
1782         region->offset_in_bar = 0;
1783         region->bar_size = prop->dram_pci_bar_size;
1784         region->bar_id = HBM_BAR_ID;
1785         region->used = 1;
1786
1787         /* SP SRAM */
1788         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1789         region->region_base = PSOC_SCRATCHPAD_ADDR;
1790         region->region_size = PSOC_SCRATCHPAD_SIZE;
1791         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1792         region->bar_size = CFG_BAR_SIZE;
1793         region->bar_id = CFG_BAR_ID;
1794         region->used = 1;
1795 }
1796
1797 static int gaudi_sw_init(struct hl_device *hdev)
1798 {
1799         struct gaudi_device *gaudi;
1800         u32 i, event_id = 0;
1801         int rc;
1802
1803         /* Allocate device structure */
1804         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1805         if (!gaudi)
1806                 return -ENOMEM;
1807
1808         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1809                 if (gaudi_irq_map_table[i].valid) {
1810                         if (event_id == GAUDI_EVENT_SIZE) {
1811                                 dev_err(hdev->dev,
1812                                         "Event array exceeds the limit of %u events\n",
1813                                         GAUDI_EVENT_SIZE);
1814                                 rc = -EINVAL;
1815                                 goto free_gaudi_device;
1816                         }
1817
1818                         gaudi->events[event_id++] =
1819                                         gaudi_irq_map_table[i].fc_id;
1820                 }
1821         }
1822
1823         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1824
1825         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1826
1827         hdev->asic_specific = gaudi;
1828
1829         /* Create DMA pool for small allocations */
1830         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1831                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1832         if (!hdev->dma_pool) {
1833                 dev_err(hdev->dev, "failed to create DMA pool\n");
1834                 rc = -ENOMEM;
1835                 goto free_gaudi_device;
1836         }
1837
1838         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1839         if (rc)
1840                 goto free_dma_pool;
1841
1842         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1843         if (!hdev->cpu_accessible_dma_pool) {
1844                 dev_err(hdev->dev,
1845                         "Failed to create CPU accessible DMA pool\n");
1846                 rc = -ENOMEM;
1847                 goto free_cpu_dma_mem;
1848         }
1849
1850         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1851                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1852                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1853         if (rc) {
1854                 dev_err(hdev->dev,
1855                         "Failed to add memory to CPU accessible DMA pool\n");
1856                 rc = -EFAULT;
1857                 goto free_cpu_accessible_dma_pool;
1858         }
1859
1860         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1861         if (rc)
1862                 goto free_cpu_accessible_dma_pool;
1863
1864         spin_lock_init(&gaudi->hw_queues_lock);
1865         mutex_init(&gaudi->clk_gate_mutex);
1866
1867         hdev->supports_sync_stream = true;
1868         hdev->supports_coresight = true;
1869         hdev->supports_staged_submission = true;
1870         hdev->supports_wait_for_multi_cs = true;
1871
1872         hdev->asic_funcs->set_pci_memory_regions(hdev);
1873
1874         return 0;
1875
1876 free_cpu_accessible_dma_pool:
1877         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1878 free_cpu_dma_mem:
1879         if (!hdev->asic_prop.fw_security_enabled)
1880                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1881                                         hdev->cpu_pci_msb_addr);
1882         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1883                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1884                         hdev->cpu_accessible_dma_mem,
1885                         hdev->cpu_accessible_dma_address);
1886 free_dma_pool:
1887         dma_pool_destroy(hdev->dma_pool);
1888 free_gaudi_device:
1889         kfree(gaudi);
1890         return rc;
1891 }
1892
1893 static int gaudi_sw_fini(struct hl_device *hdev)
1894 {
1895         struct gaudi_device *gaudi = hdev->asic_specific;
1896
1897         gaudi_free_internal_qmans_pq_mem(hdev);
1898
1899         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1900
1901         if (!hdev->asic_prop.fw_security_enabled)
1902                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1903                                         hdev->cpu_pci_msb_addr);
1904
1905         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1906                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1907                         hdev->cpu_accessible_dma_mem,
1908                         hdev->cpu_accessible_dma_address);
1909
1910         dma_pool_destroy(hdev->dma_pool);
1911
1912         mutex_destroy(&gaudi->clk_gate_mutex);
1913
1914         kfree(gaudi);
1915
1916         return 0;
1917 }
1918
1919 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1920 {
1921         struct hl_device *hdev = arg;
1922         int i;
1923
1924         if (hdev->disabled)
1925                 return IRQ_HANDLED;
1926
1927         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1928                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1929
1930         hl_irq_handler_eq(irq, &hdev->event_queue);
1931
1932         return IRQ_HANDLED;
1933 }
1934
1935 /*
1936  * For backward compatibility, new MSI interrupts should be set after the
1937  * existing CPU and NIC interrupts.
1938  */
1939 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1940                                 bool cpu_eq)
1941 {
1942         int msi_vec;
1943
1944         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1945                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1946                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1947
1948         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1949                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1950
1951         return pci_irq_vector(hdev->pdev, msi_vec);
1952 }
1953
1954 static int gaudi_enable_msi_single(struct hl_device *hdev)
1955 {
1956         int rc, irq;
1957
1958         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1959
1960         irq = gaudi_pci_irq_vector(hdev, 0, false);
1961         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1962                         "gaudi single msi", hdev);
1963         if (rc)
1964                 dev_err(hdev->dev,
1965                         "Failed to request single MSI IRQ\n");
1966
1967         return rc;
1968 }
1969
1970 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1971 {
1972         int cq_cnt = hdev->asic_prop.completion_queues_count;
1973         int rc, i, irq_cnt_init, irq;
1974
1975         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1976                 irq = gaudi_pci_irq_vector(hdev, i, false);
1977                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1978                                 &hdev->completion_queue[i]);
1979                 if (rc) {
1980                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1981                         goto free_irqs;
1982                 }
1983         }
1984
1985         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1986         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1987                                 &hdev->event_queue);
1988         if (rc) {
1989                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1990                 goto free_irqs;
1991         }
1992
1993         return 0;
1994
1995 free_irqs:
1996         for (i = 0 ; i < irq_cnt_init ; i++)
1997                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1998                                 &hdev->completion_queue[i]);
1999         return rc;
2000 }
2001
2002 static int gaudi_enable_msi(struct hl_device *hdev)
2003 {
2004         struct gaudi_device *gaudi = hdev->asic_specific;
2005         int rc;
2006
2007         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2008                 return 0;
2009
2010         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2011         if (rc < 0) {
2012                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2013                 return rc;
2014         }
2015
2016         if (rc < NUMBER_OF_INTERRUPTS) {
2017                 gaudi->multi_msi_mode = false;
2018                 rc = gaudi_enable_msi_single(hdev);
2019         } else {
2020                 gaudi->multi_msi_mode = true;
2021                 rc = gaudi_enable_msi_multi(hdev);
2022         }
2023
2024         if (rc)
2025                 goto free_pci_irq_vectors;
2026
2027         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2028
2029         return 0;
2030
2031 free_pci_irq_vectors:
2032         pci_free_irq_vectors(hdev->pdev);
2033         return rc;
2034 }
2035
2036 static void gaudi_sync_irqs(struct hl_device *hdev)
2037 {
2038         struct gaudi_device *gaudi = hdev->asic_specific;
2039         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2040
2041         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2042                 return;
2043
2044         /* Wait for all pending IRQs to be finished */
2045         if (gaudi->multi_msi_mode) {
2046                 for (i = 0 ; i < cq_cnt ; i++)
2047                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2048
2049                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2050                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2051                                                 true));
2052         } else {
2053                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2054         }
2055 }
2056
2057 static void gaudi_disable_msi(struct hl_device *hdev)
2058 {
2059         struct gaudi_device *gaudi = hdev->asic_specific;
2060         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2061
2062         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2063                 return;
2064
2065         gaudi_sync_irqs(hdev);
2066
2067         if (gaudi->multi_msi_mode) {
2068                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2069                                                 true);
2070                 free_irq(irq, &hdev->event_queue);
2071
2072                 for (i = 0 ; i < cq_cnt ; i++) {
2073                         irq = gaudi_pci_irq_vector(hdev, i, false);
2074                         free_irq(irq, &hdev->completion_queue[i]);
2075                 }
2076         } else {
2077                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2078         }
2079
2080         pci_free_irq_vectors(hdev->pdev);
2081
2082         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2083 }
2084
2085 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2086 {
2087         struct gaudi_device *gaudi = hdev->asic_specific;
2088
2089         if (hdev->asic_prop.fw_security_enabled)
2090                 return;
2091
2092         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2093                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2094                 return;
2095
2096         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2097                 return;
2098
2099         if (!hdev->sram_scrambler_enable)
2100                 return;
2101
2102         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2103                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2105                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2107                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2109                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2111                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2113                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2115                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2116         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2117                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2118
2119         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2120                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2121         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2122                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2123         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2124                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2125         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2126                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2127         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2128                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2129         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2130                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2131         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2132                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2133         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2134                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2135
2136         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2137                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2138         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2139                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2140         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2141                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2142         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2143                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2144         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2145                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2146         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2147                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2148         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2149                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2150         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2151                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2152
2153         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2154 }
2155
2156 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2157 {
2158         struct gaudi_device *gaudi = hdev->asic_specific;
2159
2160         if (hdev->asic_prop.fw_security_enabled)
2161                 return;
2162
2163         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2164                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2165                 return;
2166
2167         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2168                 return;
2169
2170         if (!hdev->dram_scrambler_enable)
2171                 return;
2172
2173         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2174                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2176                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2178                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2180                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2182                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2184                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2185         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2186                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2187         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2188                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2189
2190         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2191                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2192         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2193                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2194         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2195                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2196         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2197                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2198         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2199                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2200         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2201                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2202         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2203                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2204         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2205                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2206
2207         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2208                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2209         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2210                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2211         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2212                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2213         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2214                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2215         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2216                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2217         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2218                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2219         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2220                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2221         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2222                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2223
2224         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2225 }
2226
2227 static void gaudi_init_e2e(struct hl_device *hdev)
2228 {
2229         if (hdev->asic_prop.fw_security_enabled)
2230                 return;
2231
2232         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2233                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2234                 return;
2235
2236         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2237         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2238         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2239         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2240
2241         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2242         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2243         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2244         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2245
2246         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2247         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2248         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2249         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2250
2251         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2252         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2253         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2254         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2255
2256         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2257         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2258         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2259         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2260
2261         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2262         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2263         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2264         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2265
2266         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2267         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2268         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2269         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2270
2271         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2272         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2273         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2274         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2275
2276         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2277         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2278         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2279         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2280
2281         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2282         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2283         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2284         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2285
2286         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2287         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2288         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2289         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2290
2291         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2292         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2293         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2294         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2295
2296         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2297         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2298         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2299         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2300
2301         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2302         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2303         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2304         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2305
2306         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2307         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2308         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2309         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2310
2311         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2312         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2313         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2314         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2315
2316         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2317         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2318         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2319         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2320
2321         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2322         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2323         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2324         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2325
2326         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2327         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2328         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2329         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2330
2331         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2332         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2333         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2334         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2335
2336         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2337         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2338         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2339         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2340
2341         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2342         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2343         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2344         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2345
2346         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2347         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2348         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2349         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2350
2351         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2352         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2353         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2354         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2355
2356         if (!hdev->dram_scrambler_enable) {
2357                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2358                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2359                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2360                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2361
2362                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2363                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2364                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2365                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2366
2367                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2368                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2369                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2370                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2371
2372                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2373                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2374                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2375                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2376
2377                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2378                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2379                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2380                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2381
2382                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2383                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2384                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2385                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2386
2387                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2388                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2389                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2390                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2391
2392                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2393                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2394                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2395                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2396
2397                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2398                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2399                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2400                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2401
2402                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2403                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2404                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2405                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2406
2407                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2408                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2409                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2410                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2411
2412                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2413                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2414                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2415                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2416
2417                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2418                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2419                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2420                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2421
2422                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2423                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2424                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2425                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2426
2427                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2428                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2429                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2430                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2431
2432                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2433                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2434                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2435                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2436
2437                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2438                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2439                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2440                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2441
2442                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2443                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2444                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2445                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2446
2447                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2448                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2449                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2450                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2451
2452                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2453                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2454                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2455                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2456
2457                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2458                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2459                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2460                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2461
2462                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2463                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2464                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2465                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2466
2467                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2468                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2469                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2470                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2471
2472                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2473                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2474                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2475                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2476         }
2477
2478         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2479                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2480         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2481                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2482
2483         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2484                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2485         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2486                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2487
2488         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2489                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2490         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2491                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2492
2493         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2494                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2495         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2496                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2497
2498         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2499                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2500         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2501                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2502
2503         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2504                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2505         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2506                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2507
2508         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2509                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2510         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2511                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2512
2513         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2514                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2515         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2516                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2517
2518         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2519                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2520         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2521                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2522
2523         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2524                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2525         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2526                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2527
2528         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2529                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2530         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2531                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2532
2533         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2534                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2535         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2536                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2537
2538         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2539                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2540         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2541                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2542
2543         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2544                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2545         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2546                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2547
2548         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2549                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2550         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2551                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2552
2553         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2554                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2555         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2556                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2557
2558         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2559                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2560         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2561                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2562
2563         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2564                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2565         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2566                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2567
2568         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2569                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2570         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2571                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2572
2573         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2574                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2575         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2576                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2577
2578         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2579                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2580         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2581                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2582
2583         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2584                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2585         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2586                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2587
2588         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2589                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2590         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2591                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2592
2593         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2594                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2595         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2596                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2597 }
2598
2599 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2600 {
2601         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2602
2603         if (hdev->asic_prop.fw_security_enabled)
2604                 return;
2605
2606         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2607                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2608                 return;
2609
2610         hbm0_wr = 0x33333333;
2611         hbm0_rd = 0x77777777;
2612         hbm1_wr = 0x55555555;
2613         hbm1_rd = 0xDDDDDDDD;
2614
2615         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2616         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2617         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2618         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2619
2620         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2621         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2622         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2623         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2624
2625         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2626         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2627         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2628         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2629
2630         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2631         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2632         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2633         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2634
2635         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2636                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2637                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2638         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2639                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2640                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2641         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2642                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2643                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2644         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2645                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2646                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2647
2648         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2649                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2650                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2651         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2652                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2653                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2654         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2655                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2656                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2657         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2658                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2659                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2660 }
2661
2662 static void gaudi_init_golden_registers(struct hl_device *hdev)
2663 {
2664         u32 tpc_offset;
2665         int tpc_id, i;
2666
2667         gaudi_init_e2e(hdev);
2668         gaudi_init_hbm_cred(hdev);
2669
2670         for (tpc_id = 0, tpc_offset = 0;
2671                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2672                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2673                 /* Mask all arithmetic interrupts from TPC */
2674                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2675                 /* Set 16 cache lines */
2676                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2677                                 ICACHE_FETCH_LINE_NUM, 2);
2678         }
2679
2680         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2681         for (i = 0 ; i < 128 ; i += 8)
2682                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2683
2684         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2685         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2686         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2687         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2688 }
2689
2690 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2691                                         int qman_id, dma_addr_t qman_pq_addr)
2692 {
2693         struct cpu_dyn_regs *dyn_regs =
2694                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2695         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2696         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2697         u32 q_off, dma_qm_offset;
2698         u32 dma_qm_err_cfg, irq_handler_offset;
2699
2700         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2701
2702         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2703                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2704         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2705                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2706         so_base_en_lo = lower_32_bits(CFG_BASE +
2707                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2708         so_base_en_hi = upper_32_bits(CFG_BASE +
2709                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2710         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2711                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2712         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2713                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2714         so_base_ws_lo = lower_32_bits(CFG_BASE +
2715                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2716         so_base_ws_hi = upper_32_bits(CFG_BASE +
2717                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2718
2719         q_off = dma_qm_offset + qman_id * 4;
2720
2721         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2722         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2723
2724         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2725         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2726         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2727
2728         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2729         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2730                                                         QMAN_LDMA_SRC_OFFSET);
2731         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2732                                                         QMAN_LDMA_DST_OFFSET);
2733
2734         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2735         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2736         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2737         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2738         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2739         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2740         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2741         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2742
2743         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2744
2745         /* The following configuration is needed only once per QMAN */
2746         if (qman_id == 0) {
2747                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2748                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2749                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2750
2751                 /* Configure RAZWI IRQ */
2752                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2753                 if (hdev->stop_on_err)
2754                         dma_qm_err_cfg |=
2755                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2756
2757                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2758
2759                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2760                         lower_32_bits(CFG_BASE + irq_handler_offset));
2761                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2762                         upper_32_bits(CFG_BASE + irq_handler_offset));
2763
2764                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2765                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2766                                                                         dma_id);
2767
2768                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2769                                 QM_ARB_ERR_MSG_EN_MASK);
2770
2771                 /* Increase ARB WDT to support streams architecture */
2772                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2773                                 GAUDI_ARB_WDT_TIMEOUT);
2774
2775                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2776                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2777
2778                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2779         }
2780 }
2781
2782 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2783 {
2784         struct cpu_dyn_regs *dyn_regs =
2785                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2786         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2787         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2788         u32 irq_handler_offset;
2789
2790         /* Set to maximum possible according to physical size */
2791         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2792         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2793
2794         /* WA for H/W bug H3-2116 */
2795         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2796
2797         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2798         if (hdev->stop_on_err)
2799                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2800
2801         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2802
2803         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2804                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2805                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2806
2807         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2808                 lower_32_bits(CFG_BASE + irq_handler_offset));
2809         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2810                 upper_32_bits(CFG_BASE + irq_handler_offset));
2811
2812         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2813                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2814         WREG32(mmDMA0_CORE_PROT + dma_offset,
2815                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2816         /* If the channel is secured, it should be in MMU bypass mode */
2817         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2818                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2819         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2820 }
2821
2822 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2823                                 u32 enable_mask)
2824 {
2825         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2826
2827         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2828 }
2829
2830 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2831 {
2832         struct gaudi_device *gaudi = hdev->asic_specific;
2833         struct hl_hw_queue *q;
2834         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2835
2836         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2837                 return;
2838
2839         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2840                 dma_id = gaudi_dma_assignment[i];
2841                 /*
2842                  * For queues after the CPU Q need to add 1 to get the correct
2843                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2844                  * order to get the correct MSI register.
2845                  */
2846                 if (dma_id > 1) {
2847                         cpu_skip = 1;
2848                         nic_skip = NIC_NUMBER_OF_ENGINES;
2849                 } else {
2850                         cpu_skip = 0;
2851                         nic_skip = 0;
2852                 }
2853
2854                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2855                         q_idx = 4 * dma_id + j + cpu_skip;
2856                         q = &hdev->kernel_queues[q_idx];
2857                         q->cq_id = cq_id++;
2858                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2859                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2860                                                 q->bus_address);
2861                 }
2862
2863                 gaudi_init_dma_core(hdev, dma_id);
2864
2865                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2866         }
2867
2868         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2869 }
2870
2871 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2872                                         int qman_id, u64 qman_base_addr)
2873 {
2874         struct cpu_dyn_regs *dyn_regs =
2875                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2876         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2877         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2878         u32 dma_qm_err_cfg, irq_handler_offset;
2879         u32 q_off, dma_qm_offset;
2880
2881         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2882
2883         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2884                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2885         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2886                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2887         so_base_en_lo = lower_32_bits(CFG_BASE +
2888                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2889         so_base_en_hi = upper_32_bits(CFG_BASE +
2890                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2891         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2892                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2893         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2894                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2895         so_base_ws_lo = lower_32_bits(CFG_BASE +
2896                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2897         so_base_ws_hi = upper_32_bits(CFG_BASE +
2898                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2899
2900         q_off = dma_qm_offset + qman_id * 4;
2901
2902         if (qman_id < 4) {
2903                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2904                                         lower_32_bits(qman_base_addr));
2905                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2906                                         upper_32_bits(qman_base_addr));
2907
2908                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2909                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2910                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2911
2912                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2913                                                         QMAN_CPDMA_SIZE_OFFSET);
2914                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2915                                                         QMAN_CPDMA_SRC_OFFSET);
2916                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2917                                                         QMAN_CPDMA_DST_OFFSET);
2918         } else {
2919                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2920                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2921                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2922
2923                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2924                                                         QMAN_LDMA_SIZE_OFFSET);
2925                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2926                                                         QMAN_LDMA_SRC_OFFSET);
2927                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2928                                                         QMAN_LDMA_DST_OFFSET);
2929
2930                 /* Configure RAZWI IRQ */
2931                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2932                 if (hdev->stop_on_err)
2933                         dma_qm_err_cfg |=
2934                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2935
2936                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2937
2938                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2939                         lower_32_bits(CFG_BASE + irq_handler_offset));
2940                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2941                         upper_32_bits(CFG_BASE + irq_handler_offset));
2942
2943                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2944                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2945                                                                         dma_id);
2946
2947                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2948                                 QM_ARB_ERR_MSG_EN_MASK);
2949
2950                 /* Increase ARB WDT to support streams architecture */
2951                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2952                                 GAUDI_ARB_WDT_TIMEOUT);
2953
2954                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2955                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2956                                 QMAN_INTERNAL_MAKE_TRUSTED);
2957         }
2958
2959         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2960         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2961         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2962         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2963
2964         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2965         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2966                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2967                                 mtr_base_ws_lo);
2968                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2969                                 mtr_base_ws_hi);
2970                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2971                                 so_base_ws_lo);
2972                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2973                                 so_base_ws_hi);
2974         }
2975 }
2976
2977 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2978 {
2979         struct gaudi_device *gaudi = hdev->asic_specific;
2980         struct gaudi_internal_qman_info *q;
2981         u64 qman_base_addr;
2982         int i, j, dma_id, internal_q_index;
2983
2984         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2985                 return;
2986
2987         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2988                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2989
2990                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2991                          /*
2992                           * Add the CPU queue in order to get the correct queue
2993                           * number as all internal queue are placed after it
2994                           */
2995                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2996
2997                         q = &gaudi->internal_qmans[internal_q_index];
2998                         qman_base_addr = (u64) q->pq_dma_addr;
2999                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3000                                                 qman_base_addr);
3001                 }
3002
3003                 /* Initializing lower CP for HBM DMA QMAN */
3004                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3005
3006                 gaudi_init_dma_core(hdev, dma_id);
3007
3008                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3009         }
3010
3011         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3012 }
3013
3014 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3015                                         int qman_id, u64 qman_base_addr)
3016 {
3017         struct cpu_dyn_regs *dyn_regs =
3018                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3019         u32 mtr_base_lo, mtr_base_hi;
3020         u32 so_base_lo, so_base_hi;
3021         u32 irq_handler_offset;
3022         u32 q_off, mme_id;
3023         u32 mme_qm_err_cfg;
3024
3025         mtr_base_lo = lower_32_bits(CFG_BASE +
3026                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3027         mtr_base_hi = upper_32_bits(CFG_BASE +
3028                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3029         so_base_lo = lower_32_bits(CFG_BASE +
3030                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3031         so_base_hi = upper_32_bits(CFG_BASE +
3032                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3033
3034         q_off = mme_offset + qman_id * 4;
3035
3036         if (qman_id < 4) {
3037                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3038                                         lower_32_bits(qman_base_addr));
3039                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3040                                         upper_32_bits(qman_base_addr));
3041
3042                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3043                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3044                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3045
3046                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047                                                         QMAN_CPDMA_SIZE_OFFSET);
3048                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049                                                         QMAN_CPDMA_SRC_OFFSET);
3050                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051                                                         QMAN_CPDMA_DST_OFFSET);
3052         } else {
3053                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3054                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3055                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3056
3057                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3058                                                         QMAN_LDMA_SIZE_OFFSET);
3059                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3060                                                         QMAN_LDMA_SRC_OFFSET);
3061                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3062                                                         QMAN_LDMA_DST_OFFSET);
3063
3064                 /* Configure RAZWI IRQ */
3065                 mme_id = mme_offset /
3066                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3067
3068                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3069                 if (hdev->stop_on_err)
3070                         mme_qm_err_cfg |=
3071                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3072
3073                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3074
3075                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3076                         lower_32_bits(CFG_BASE + irq_handler_offset));
3077                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3078                         upper_32_bits(CFG_BASE + irq_handler_offset));
3079
3080                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3081                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3082                                                                         mme_id);
3083
3084                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3085                                 QM_ARB_ERR_MSG_EN_MASK);
3086
3087                 /* Increase ARB WDT to support streams architecture */
3088                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3089                                 GAUDI_ARB_WDT_TIMEOUT);
3090
3091                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3092                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3093                                 QMAN_INTERNAL_MAKE_TRUSTED);
3094         }
3095
3096         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3097         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3098         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3099         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3100 }
3101
3102 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3103 {
3104         struct gaudi_device *gaudi = hdev->asic_specific;
3105         struct gaudi_internal_qman_info *q;
3106         u64 qman_base_addr;
3107         u32 mme_offset;
3108         int i, internal_q_index;
3109
3110         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3111                 return;
3112
3113         /*
3114          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3115          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3116          */
3117
3118         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3119
3120         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3121                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3122                 q = &gaudi->internal_qmans[internal_q_index];
3123                 qman_base_addr = (u64) q->pq_dma_addr;
3124                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3125                                         qman_base_addr);
3126                 if (i == 3)
3127                         mme_offset = 0;
3128         }
3129
3130         /* Initializing lower CP for MME QMANs */
3131         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3132         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3133         gaudi_init_mme_qman(hdev, 0, 4, 0);
3134
3135         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3136         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3137
3138         gaudi->hw_cap_initialized |= HW_CAP_MME;
3139 }
3140
3141 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3142                                 int qman_id, u64 qman_base_addr)
3143 {
3144         struct cpu_dyn_regs *dyn_regs =
3145                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3146         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3147         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3148         u32 tpc_qm_err_cfg, irq_handler_offset;
3149         u32 q_off, tpc_id;
3150
3151         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3152                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3153         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3154                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3155         so_base_en_lo = lower_32_bits(CFG_BASE +
3156                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3157         so_base_en_hi = upper_32_bits(CFG_BASE +
3158                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3159         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3160                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3161         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3162                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3163         so_base_ws_lo = lower_32_bits(CFG_BASE +
3164                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3165         so_base_ws_hi = upper_32_bits(CFG_BASE +
3166                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3167
3168         q_off = tpc_offset + qman_id * 4;
3169
3170         tpc_id = tpc_offset /
3171                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3172
3173         if (qman_id < 4) {
3174                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3175                                         lower_32_bits(qman_base_addr));
3176                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3177                                         upper_32_bits(qman_base_addr));
3178
3179                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3180                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3181                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3182
3183                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3184                                                         QMAN_CPDMA_SIZE_OFFSET);
3185                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3186                                                         QMAN_CPDMA_SRC_OFFSET);
3187                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3188                                                         QMAN_CPDMA_DST_OFFSET);
3189         } else {
3190                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3191                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3192                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3193
3194                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3195                                                         QMAN_LDMA_SIZE_OFFSET);
3196                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3197                                                         QMAN_LDMA_SRC_OFFSET);
3198                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3199                                                         QMAN_LDMA_DST_OFFSET);
3200
3201                 /* Configure RAZWI IRQ */
3202                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3203                 if (hdev->stop_on_err)
3204                         tpc_qm_err_cfg |=
3205                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3206
3207                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3208
3209                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3210                         lower_32_bits(CFG_BASE + irq_handler_offset));
3211                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3212                         upper_32_bits(CFG_BASE + irq_handler_offset));
3213
3214                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3215                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3216                                                                         tpc_id);
3217
3218                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3219                                 QM_ARB_ERR_MSG_EN_MASK);
3220
3221                 /* Increase ARB WDT to support streams architecture */
3222                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3223                                 GAUDI_ARB_WDT_TIMEOUT);
3224
3225                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3226                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3227                                 QMAN_INTERNAL_MAKE_TRUSTED);
3228         }
3229
3230         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3231         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3232         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3233         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3234
3235         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3236         if (tpc_id == 6) {
3237                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3238                                 mtr_base_ws_lo);
3239                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3240                                 mtr_base_ws_hi);
3241                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3242                                 so_base_ws_lo);
3243                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3244                                 so_base_ws_hi);
3245         }
3246 }
3247
3248 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3249 {
3250         struct gaudi_device *gaudi = hdev->asic_specific;
3251         struct gaudi_internal_qman_info *q;
3252         u64 qman_base_addr;
3253         u32 so_base_hi, tpc_offset = 0;
3254         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3255                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3256         int i, tpc_id, internal_q_index;
3257
3258         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3259                 return;
3260
3261         so_base_hi = upper_32_bits(CFG_BASE +
3262                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3263
3264         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3265                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3266                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3267                                                 tpc_id * QMAN_STREAMS + i;
3268                         q = &gaudi->internal_qmans[internal_q_index];
3269                         qman_base_addr = (u64) q->pq_dma_addr;
3270                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3271                                                 qman_base_addr);
3272
3273                         if (i == 3) {
3274                                 /* Initializing lower CP for TPC QMAN */
3275                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3276
3277                                 /* Enable the QMAN and TPC channel */
3278                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3279                                                 QMAN_TPC_ENABLE);
3280                         }
3281                 }
3282
3283                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3284                                 so_base_hi);
3285
3286                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3287
3288                 gaudi->hw_cap_initialized |=
3289                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3290         }
3291 }
3292
3293 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3294                                 int qman_id, u64 qman_base_addr, int nic_id)
3295 {
3296         struct cpu_dyn_regs *dyn_regs =
3297                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3298         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3299         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3300         u32 nic_qm_err_cfg, irq_handler_offset;
3301         u32 q_off;
3302
3303         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3304                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3305         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3306                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3307         so_base_en_lo = lower_32_bits(CFG_BASE +
3308                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3309         so_base_en_hi = upper_32_bits(CFG_BASE +
3310                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3311         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3312                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3313         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3314                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3315         so_base_ws_lo = lower_32_bits(CFG_BASE +
3316                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3317         so_base_ws_hi = upper_32_bits(CFG_BASE +
3318                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3319
3320         q_off = nic_offset + qman_id * 4;
3321
3322         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3323         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3324
3325         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3326         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3327         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3328
3329         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3330                                                         QMAN_LDMA_SIZE_OFFSET);
3331         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3332                                                         QMAN_LDMA_SRC_OFFSET);
3333         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3334                                                         QMAN_LDMA_DST_OFFSET);
3335
3336         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3337         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3338         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3339         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3340
3341         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3342         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3343         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3344         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3345         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3346
3347         if (qman_id == 0) {
3348                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3349                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3350                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3351
3352                 /* Configure RAZWI IRQ */
3353                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3354                 if (hdev->stop_on_err)
3355                         nic_qm_err_cfg |=
3356                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3357
3358                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3359
3360                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3361                         lower_32_bits(CFG_BASE + irq_handler_offset));
3362                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3363                         upper_32_bits(CFG_BASE + irq_handler_offset));
3364
3365                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3366                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3367                                                                         nic_id);
3368
3369                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3370                                 QM_ARB_ERR_MSG_EN_MASK);
3371
3372                 /* Increase ARB WDT to support streams architecture */
3373                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3374                                 GAUDI_ARB_WDT_TIMEOUT);
3375
3376                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3377                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3378                                 QMAN_INTERNAL_MAKE_TRUSTED);
3379         }
3380 }
3381
3382 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3383 {
3384         struct gaudi_device *gaudi = hdev->asic_specific;
3385         struct gaudi_internal_qman_info *q;
3386         u64 qman_base_addr;
3387         u32 nic_offset = 0;
3388         u32 nic_delta_between_qmans =
3389                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3390         u32 nic_delta_between_nics =
3391                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3392         int i, nic_id, internal_q_index;
3393
3394         if (!hdev->nic_ports_mask)
3395                 return;
3396
3397         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3398                 return;
3399
3400         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3401
3402         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3403                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3404                         nic_offset += nic_delta_between_qmans;
3405                         if (nic_id & 1) {
3406                                 nic_offset -= (nic_delta_between_qmans * 2);
3407                                 nic_offset += nic_delta_between_nics;
3408                         }
3409                         continue;
3410                 }
3411
3412                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3413                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3414                                                 nic_id * QMAN_STREAMS + i;
3415                         q = &gaudi->internal_qmans[internal_q_index];
3416                         qman_base_addr = (u64) q->pq_dma_addr;
3417                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3418                                                 qman_base_addr, nic_id);
3419                 }
3420
3421                 /* Enable the QMAN */
3422                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3423
3424                 nic_offset += nic_delta_between_qmans;
3425                 if (nic_id & 1) {
3426                         nic_offset -= (nic_delta_between_qmans * 2);
3427                         nic_offset += nic_delta_between_nics;
3428                 }
3429
3430                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3431         }
3432 }
3433
3434 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3435 {
3436         struct gaudi_device *gaudi = hdev->asic_specific;
3437
3438         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3439                 return;
3440
3441         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3442         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3443         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3444 }
3445
3446 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3447 {
3448         struct gaudi_device *gaudi = hdev->asic_specific;
3449
3450         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3451                 return;
3452
3453         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3454         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3455         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3456         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3457         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3458 }
3459
3460 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3461 {
3462         struct gaudi_device *gaudi = hdev->asic_specific;
3463
3464         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3465                 return;
3466
3467         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3468         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3469 }
3470
3471 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3472 {
3473         struct gaudi_device *gaudi = hdev->asic_specific;
3474         u32 tpc_offset = 0;
3475         int tpc_id;
3476
3477         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3478                 return;
3479
3480         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3481                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3482                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3483         }
3484 }
3485
3486 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3487 {
3488         struct gaudi_device *gaudi = hdev->asic_specific;
3489         u32 nic_mask, nic_offset = 0;
3490         u32 nic_delta_between_qmans =
3491                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3492         u32 nic_delta_between_nics =
3493                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3494         int nic_id;
3495
3496         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3497                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3498
3499                 if (gaudi->hw_cap_initialized & nic_mask)
3500                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3501
3502                 nic_offset += nic_delta_between_qmans;
3503                 if (nic_id & 1) {
3504                         nic_offset -= (nic_delta_between_qmans * 2);
3505                         nic_offset += nic_delta_between_nics;
3506                 }
3507         }
3508 }
3509
3510 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3511 {
3512         struct gaudi_device *gaudi = hdev->asic_specific;
3513
3514         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3515                 return;
3516
3517         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3518         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3519         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3520         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3521 }
3522
3523 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3524 {
3525         struct gaudi_device *gaudi = hdev->asic_specific;
3526
3527         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3528                 return;
3529
3530         /* Stop CPs of HBM DMA QMANs */
3531
3532         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3533         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3534         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3535         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3536         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3537 }
3538
3539 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3540 {
3541         struct gaudi_device *gaudi = hdev->asic_specific;
3542
3543         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3544                 return;
3545
3546         /* Stop CPs of MME QMANs */
3547         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3548         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3549 }
3550
3551 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3552 {
3553         struct gaudi_device *gaudi = hdev->asic_specific;
3554
3555         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3556                 return;
3557
3558         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3559         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3560         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3561         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3562         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3563         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3564         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3565         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3566 }
3567
3568 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3569 {
3570         struct gaudi_device *gaudi = hdev->asic_specific;
3571
3572         /* Stop upper CPs of QMANs */
3573
3574         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3575                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3576                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3577                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3578                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3579
3580         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3581                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3582                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3583                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3584                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3585
3586         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3587                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3588                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3589                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3590                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3591
3592         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3593                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3594                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3595                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3596                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3597
3598         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3599                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3600                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3601                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3602                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3603
3604         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3605                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3606                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3607                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3608                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3609
3610         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3611                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3612                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3613                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3614                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3615
3616         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3617                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3618                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3619                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3620                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3621
3622         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3623                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3624                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3625                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3626                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3627
3628         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3629                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3630                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3631                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3632                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3633 }
3634
3635 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3636 {
3637         struct gaudi_device *gaudi = hdev->asic_specific;
3638
3639         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3640                 return;
3641
3642         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3643         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3644         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3645 }
3646
3647 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3648 {
3649         struct gaudi_device *gaudi = hdev->asic_specific;
3650
3651         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3652                 return;
3653
3654         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3655         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3656         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3657         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3658         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3659 }
3660
3661 static void gaudi_mme_stall(struct hl_device *hdev)
3662 {
3663         struct gaudi_device *gaudi = hdev->asic_specific;
3664
3665         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3666                 return;
3667
3668         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3669         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3670         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3671         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3672         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3673         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3674         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3675         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3676         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3677         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3678         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3679         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3680         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3681         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3682         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3683         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3684         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3685 }
3686
3687 static void gaudi_tpc_stall(struct hl_device *hdev)
3688 {
3689         struct gaudi_device *gaudi = hdev->asic_specific;
3690
3691         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3692                 return;
3693
3694         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3695         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3696         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3697         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3698         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3699         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3700         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3701         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3702 }
3703
3704 static void gaudi_set_clock_gating(struct hl_device *hdev)
3705 {
3706         struct gaudi_device *gaudi = hdev->asic_specific;
3707         u32 qman_offset;
3708         bool enable;
3709         int i;
3710
3711         /* In case we are during debug session, don't enable the clock gate
3712          * as it may interfere
3713          */
3714         if (hdev->in_debug)
3715                 return;
3716
3717         if (hdev->asic_prop.fw_security_enabled)
3718                 return;
3719
3720         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3721                 enable = !!(hdev->clock_gating_mask &
3722                                 (BIT_ULL(gaudi_dma_assignment[i])));
3723
3724                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3725                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3726                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3727                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3728                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3729         }
3730
3731         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3732                 enable = !!(hdev->clock_gating_mask &
3733                                 (BIT_ULL(gaudi_dma_assignment[i])));
3734
3735                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3736                  * we need to not enable clock gating in that DMA
3737                  */
3738                 if (i == GAUDI_HBM_DMA_4)
3739                         enable = 0;
3740
3741                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3742                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3743                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3744                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3745                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3746         }
3747
3748         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3749         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3750         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3751
3752         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3753         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3754         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3755
3756         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3757                 enable = !!(hdev->clock_gating_mask &
3758                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3759
3760                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3761                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3762                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3763                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3764
3765                 qman_offset += TPC_QMAN_OFFSET;
3766         }
3767
3768         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3769 }
3770
3771 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3772 {
3773         struct gaudi_device *gaudi = hdev->asic_specific;
3774         u32 qman_offset;
3775         int i;
3776
3777         if (hdev->asic_prop.fw_security_enabled)
3778                 return;
3779
3780         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3781                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3782                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3783
3784                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3785         }
3786
3787         WREG32(mmMME0_QM_CGM_CFG, 0);
3788         WREG32(mmMME0_QM_CGM_CFG1, 0);
3789         WREG32(mmMME2_QM_CGM_CFG, 0);
3790         WREG32(mmMME2_QM_CGM_CFG1, 0);
3791
3792         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3793                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3794                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3795
3796                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3797         }
3798
3799         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3800 }
3801
3802 static void gaudi_enable_timestamp(struct hl_device *hdev)
3803 {
3804         /* Disable the timestamp counter */
3805         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3806
3807         /* Zero the lower/upper parts of the 64-bit counter */
3808         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3809         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3810
3811         /* Enable the counter */
3812         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3813 }
3814
3815 static void gaudi_disable_timestamp(struct hl_device *hdev)
3816 {
3817         /* Disable the timestamp counter */
3818         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3819 }
3820
3821 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3822 {
3823         u32 wait_timeout_ms;
3824
3825         dev_info(hdev->dev,
3826                 "Halting compute engines and disabling interrupts\n");
3827
3828         if (hdev->pldm)
3829                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3830         else
3831                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3832
3833         gaudi_stop_nic_qmans(hdev);
3834         gaudi_stop_mme_qmans(hdev);
3835         gaudi_stop_tpc_qmans(hdev);
3836         gaudi_stop_hbm_dma_qmans(hdev);
3837         gaudi_stop_pci_dma_qmans(hdev);
3838
3839         hdev->asic_funcs->disable_clock_gating(hdev);
3840
3841         msleep(wait_timeout_ms);
3842
3843         gaudi_pci_dma_stall(hdev);
3844         gaudi_hbm_dma_stall(hdev);
3845         gaudi_tpc_stall(hdev);
3846         gaudi_mme_stall(hdev);
3847
3848         msleep(wait_timeout_ms);
3849
3850         gaudi_disable_nic_qmans(hdev);
3851         gaudi_disable_mme_qmans(hdev);
3852         gaudi_disable_tpc_qmans(hdev);
3853         gaudi_disable_hbm_dma_qmans(hdev);
3854         gaudi_disable_pci_dma_qmans(hdev);
3855
3856         gaudi_disable_timestamp(hdev);
3857
3858         gaudi_disable_msi(hdev);
3859 }
3860
3861 static int gaudi_mmu_init(struct hl_device *hdev)
3862 {
3863         struct asic_fixed_properties *prop = &hdev->asic_prop;
3864         struct gaudi_device *gaudi = hdev->asic_specific;
3865         u64 hop0_addr;
3866         int rc, i;
3867
3868         if (!hdev->mmu_enable)
3869                 return 0;
3870
3871         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3872                 return 0;
3873
3874         for (i = 0 ; i < prop->max_asid ; i++) {
3875                 hop0_addr = prop->mmu_pgt_addr +
3876                                 (i * prop->mmu_hop_table_size);
3877
3878                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3879                 if (rc) {
3880                         dev_err(hdev->dev,
3881                                 "failed to set hop0 addr for asid %d\n", i);
3882                         goto err;
3883                 }
3884         }
3885
3886         /* init MMU cache manage page */
3887         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3888         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3889
3890         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3891
3892         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3893         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3894
3895         WREG32(mmSTLB_HOP_CONFIGURATION,
3896                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3897
3898         /*
3899          * The H/W expects the first PI after init to be 1. After wraparound
3900          * we'll write 0.
3901          */
3902         gaudi->mmu_cache_inv_pi = 1;
3903
3904         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3905
3906         return 0;
3907
3908 err:
3909         return rc;
3910 }
3911
3912 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3913 {
3914         void __iomem *dst;
3915
3916         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3917
3918         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3919 }
3920
3921 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3922 {
3923         void __iomem *dst;
3924
3925         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3926
3927         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3928 }
3929
3930 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3931 {
3932         struct dynamic_fw_load_mgr *dynamic_loader;
3933         struct cpu_dyn_regs *dyn_regs;
3934
3935         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3936
3937         /*
3938          * here we update initial values for few specific dynamic regs (as
3939          * before reading the first descriptor from FW those value has to be
3940          * hard-coded) in later stages of the protocol those values will be
3941          * updated automatically by reading the FW descriptor so data there
3942          * will always be up-to-date
3943          */
3944         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3945         dyn_regs->kmd_msg_to_cpu =
3946                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3947         dyn_regs->cpu_cmd_status_to_host =
3948                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3949
3950         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3951 }
3952
3953 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3954 {
3955         struct static_fw_load_mgr *static_loader;
3956
3957         static_loader = &hdev->fw_loader.static_loader;
3958
3959         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3960         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3961         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3962         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3963         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3964         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3965         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3966         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3967         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3968         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3969         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3970         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3971         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3972                         GAUDI_PLDM_RESET_WAIT_MSEC :
3973                         GAUDI_CPU_RESET_WAIT_MSEC;
3974 }
3975
3976 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3977 {
3978         struct asic_fixed_properties *prop = &hdev->asic_prop;
3979         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3980
3981         /* fill common fields */
3982         fw_loader->linux_loaded = false;
3983         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3984         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3985         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3986         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3987         fw_loader->skip_bmc = !hdev->bmc_enable;
3988         fw_loader->sram_bar_id = SRAM_BAR_ID;
3989         fw_loader->dram_bar_id = HBM_BAR_ID;
3990
3991         if (prop->dynamic_fw_load)
3992                 gaudi_init_dynamic_firmware_loader(hdev);
3993         else
3994                 gaudi_init_static_firmware_loader(hdev);
3995 }
3996
3997 static int gaudi_init_cpu(struct hl_device *hdev)
3998 {
3999         struct gaudi_device *gaudi = hdev->asic_specific;
4000         int rc;
4001
4002         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4003                 return 0;
4004
4005         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4006                 return 0;
4007
4008         /*
4009          * The device CPU works with 40 bits addresses.
4010          * This register sets the extension to 50 bits.
4011          */
4012         if (!hdev->asic_prop.fw_security_enabled)
4013                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4014
4015         rc = hl_fw_init_cpu(hdev);
4016
4017         if (rc)
4018                 return rc;
4019
4020         gaudi->hw_cap_initialized |= HW_CAP_CPU;
4021
4022         return 0;
4023 }
4024
4025 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4026 {
4027         struct cpu_dyn_regs *dyn_regs =
4028                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4029         struct asic_fixed_properties *prop = &hdev->asic_prop;
4030         struct gaudi_device *gaudi = hdev->asic_specific;
4031         u32 status, irq_handler_offset;
4032         struct hl_eq *eq;
4033         struct hl_hw_queue *cpu_pq =
4034                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4035         int err;
4036
4037         if (!hdev->cpu_queues_enable)
4038                 return 0;
4039
4040         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4041                 return 0;
4042
4043         eq = &hdev->event_queue;
4044
4045         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4046         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4047
4048         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4049         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4050
4051         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4052                         lower_32_bits(hdev->cpu_accessible_dma_address));
4053         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4054                         upper_32_bits(hdev->cpu_accessible_dma_address));
4055
4056         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4057         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4058         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4059
4060         /* Used for EQ CI */
4061         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4062
4063         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4064
4065         if (gaudi->multi_msi_mode)
4066                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4067         else
4068                 WREG32(mmCPU_IF_QUEUE_INIT,
4069                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4070
4071         irq_handler_offset = prop->gic_interrupts_enable ?
4072                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4073                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4074
4075         WREG32(irq_handler_offset,
4076                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4077
4078         err = hl_poll_timeout(
4079                 hdev,
4080                 mmCPU_IF_QUEUE_INIT,
4081                 status,
4082                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4083                 1000,
4084                 cpu_timeout);
4085
4086         if (err) {
4087                 dev_err(hdev->dev,
4088                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4089                 return -EIO;
4090         }
4091
4092         /* update FW application security bits */
4093         if (prop->fw_cpu_boot_dev_sts0_valid)
4094                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4095         if (prop->fw_cpu_boot_dev_sts1_valid)
4096                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4097
4098         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4099         return 0;
4100 }
4101
4102 static void gaudi_pre_hw_init(struct hl_device *hdev)
4103 {
4104         /* Perform read from the device to make sure device is up */
4105         RREG32(mmHW_STATE);
4106
4107         if (!hdev->asic_prop.fw_security_enabled) {
4108                 /* Set the access through PCI bars (Linux driver only) as
4109                  * secured
4110                  */
4111                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4112                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4113                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4114
4115                 /* Perform read to flush the waiting writes to ensure
4116                  * configuration was set in the device
4117                  */
4118                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4119         }
4120
4121         /*
4122          * Let's mark in the H/W that we have reached this point. We check
4123          * this value in the reset_before_init function to understand whether
4124          * we need to reset the chip before doing H/W init. This register is
4125          * cleared by the H/W upon H/W reset
4126          */
4127         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4128 }
4129
4130 static int gaudi_hw_init(struct hl_device *hdev)
4131 {
4132         struct gaudi_device *gaudi = hdev->asic_specific;
4133         int rc;
4134
4135         gaudi_pre_hw_init(hdev);
4136
4137         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4138          * So we set it here and if anyone tries to move it later to
4139          * a different address, there will be an error
4140          */
4141         if (hdev->asic_prop.iatu_done_by_fw)
4142                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4143
4144         /*
4145          * Before pushing u-boot/linux to device, need to set the hbm bar to
4146          * base address of dram
4147          */
4148         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4149                 dev_err(hdev->dev,
4150                         "failed to map HBM bar to DRAM base address\n");
4151                 return -EIO;
4152         }
4153
4154         rc = gaudi_init_cpu(hdev);
4155         if (rc) {
4156                 dev_err(hdev->dev, "failed to initialize CPU\n");
4157                 return rc;
4158         }
4159
4160         /* In case the clock gating was enabled in preboot we need to disable
4161          * it here before touching the MME/TPC registers.
4162          * There is no need to take clk gating mutex because when this function
4163          * runs, no other relevant code can run
4164          */
4165         hdev->asic_funcs->disable_clock_gating(hdev);
4166
4167         /* SRAM scrambler must be initialized after CPU is running from HBM */
4168         gaudi_init_scrambler_sram(hdev);
4169
4170         /* This is here just in case we are working without CPU */
4171         gaudi_init_scrambler_hbm(hdev);
4172
4173         gaudi_init_golden_registers(hdev);
4174
4175         rc = gaudi_mmu_init(hdev);
4176         if (rc)
4177                 return rc;
4178
4179         gaudi_init_security(hdev);
4180
4181         gaudi_init_pci_dma_qmans(hdev);
4182
4183         gaudi_init_hbm_dma_qmans(hdev);
4184
4185         gaudi_init_mme_qmans(hdev);
4186
4187         gaudi_init_tpc_qmans(hdev);
4188
4189         gaudi_init_nic_qmans(hdev);
4190
4191         hdev->asic_funcs->set_clock_gating(hdev);
4192
4193         gaudi_enable_timestamp(hdev);
4194
4195         /* MSI must be enabled before CPU queues and NIC are initialized */
4196         rc = gaudi_enable_msi(hdev);
4197         if (rc)
4198                 goto disable_queues;
4199
4200         /* must be called after MSI was enabled */
4201         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4202         if (rc) {
4203                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4204                         rc);
4205                 goto disable_msi;
4206         }
4207
4208         /* Perform read from the device to flush all configuration */
4209         RREG32(mmHW_STATE);
4210
4211         return 0;
4212
4213 disable_msi:
4214         gaudi_disable_msi(hdev);
4215 disable_queues:
4216         gaudi_disable_mme_qmans(hdev);
4217         gaudi_disable_pci_dma_qmans(hdev);
4218
4219         return rc;
4220 }
4221
4222 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4223 {
4224         struct cpu_dyn_regs *dyn_regs =
4225                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4226         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4227         struct gaudi_device *gaudi = hdev->asic_specific;
4228         bool driver_performs_reset;
4229
4230         if (!hard_reset) {
4231                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4232                 return;
4233         }
4234
4235         if (hdev->pldm) {
4236                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4237                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4238         } else {
4239                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4240                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4241         }
4242
4243         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4244                                         !hdev->asic_prop.hard_reset_done_by_fw);
4245
4246         /* Set device to handle FLR by H/W as we will put the device CPU to
4247          * halt mode
4248          */
4249         if (driver_performs_reset)
4250                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4251                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4252
4253         /* If linux is loaded in the device CPU we need to communicate with it
4254          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4255          * registers in case of old F/Ws
4256          */
4257         if (hdev->fw_loader.linux_loaded) {
4258                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4259                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4260                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4261
4262                 WREG32(irq_handler_offset,
4263                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4264         } else {
4265                 if (hdev->asic_prop.hard_reset_done_by_fw)
4266                         hl_fw_ask_hard_reset_without_linux(hdev);
4267                 else
4268                         hl_fw_ask_halt_machine_without_linux(hdev);
4269         }
4270
4271         if (driver_performs_reset) {
4272
4273                 /* Configure the reset registers. Must be done as early as
4274                  * possible in case we fail during H/W initialization
4275                  */
4276                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4277                                                 (CFG_RST_H_DMA_MASK |
4278                                                 CFG_RST_H_MME_MASK |
4279                                                 CFG_RST_H_SM_MASK |
4280                                                 CFG_RST_H_TPC_7_MASK));
4281
4282                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4283
4284                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4285                                                 (CFG_RST_H_HBM_MASK |
4286                                                 CFG_RST_H_TPC_7_MASK |
4287                                                 CFG_RST_H_NIC_MASK |
4288                                                 CFG_RST_H_SM_MASK |
4289                                                 CFG_RST_H_DMA_MASK |
4290                                                 CFG_RST_H_MME_MASK |
4291                                                 CFG_RST_H_CPU_MASK |
4292                                                 CFG_RST_H_MMU_MASK));
4293
4294                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4295                                                 (CFG_RST_L_IF_MASK |
4296                                                 CFG_RST_L_PSOC_MASK |
4297                                                 CFG_RST_L_TPC_MASK));
4298
4299                 msleep(cpu_timeout_ms);
4300
4301                 /* Tell ASIC not to re-initialize PCIe */
4302                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4303
4304                 /* Restart BTL/BLR upon hard-reset */
4305                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4306
4307                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4308                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4309
4310                 dev_info(hdev->dev,
4311                         "Issued HARD reset command, going to wait %dms\n",
4312                         reset_timeout_ms);
4313         } else {
4314                 dev_info(hdev->dev,
4315                         "Firmware performs HARD reset, going to wait %dms\n",
4316                         reset_timeout_ms);
4317         }
4318
4319         /*
4320          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4321          * itself is in reset. Need to wait until the reset is deasserted
4322          */
4323         msleep(reset_timeout_ms);
4324
4325         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4326         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4327                 dev_err(hdev->dev,
4328                         "Timeout while waiting for device to reset 0x%x\n",
4329                         status);
4330
4331         if (gaudi) {
4332                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4333                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4334                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4335                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4336                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4337                                 HW_CAP_SRAM_SCRAMBLER |
4338                                 HW_CAP_HBM_SCRAMBLER |
4339                                 HW_CAP_CLK_GATE);
4340
4341                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4342
4343                 hdev->device_cpu_is_halted = false;
4344         }
4345 }
4346
4347 static int gaudi_suspend(struct hl_device *hdev)
4348 {
4349         int rc;
4350
4351         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4352         if (rc)
4353                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4354
4355         return rc;
4356 }
4357
4358 static int gaudi_resume(struct hl_device *hdev)
4359 {
4360         return gaudi_init_iatu(hdev);
4361 }
4362
4363 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4364                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4365 {
4366         int rc;
4367
4368         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4369                         VM_DONTCOPY | VM_NORESERVE;
4370
4371         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4372                                 (dma_addr - HOST_PHYS_BASE), size);
4373         if (rc)
4374                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4375
4376         return rc;
4377 }
4378
4379 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4380 {
4381         struct cpu_dyn_regs *dyn_regs =
4382                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4383         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4384         struct gaudi_device *gaudi = hdev->asic_specific;
4385         bool invalid_queue = false;
4386         int dma_id;
4387
4388         switch (hw_queue_id) {
4389         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4390                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4391                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4392                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4393                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4394                 break;
4395
4396         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4397                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4398                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4399                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4400                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4401                 break;
4402
4403         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4404                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4405                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4406                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4407                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4408                 break;
4409
4410         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4411                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4412                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4413                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4414                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4415                 break;
4416
4417         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4418                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4419                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4420                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4421                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4422                 break;
4423
4424         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4425                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4426                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4427                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4428                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4429                 break;
4430
4431         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4432                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4433                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4434                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4435                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4436                 break;
4437
4438         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4439                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4440                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4441                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4442                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4443                 break;
4444
4445         case GAUDI_QUEUE_ID_CPU_PQ:
4446                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4447                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4448                 else
4449                         invalid_queue = true;
4450                 break;
4451
4452         case GAUDI_QUEUE_ID_MME_0_0:
4453                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4454                 break;
4455
4456         case GAUDI_QUEUE_ID_MME_0_1:
4457                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4458                 break;
4459
4460         case GAUDI_QUEUE_ID_MME_0_2:
4461                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4462                 break;
4463
4464         case GAUDI_QUEUE_ID_MME_0_3:
4465                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4466                 break;
4467
4468         case GAUDI_QUEUE_ID_MME_1_0:
4469                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4470                 break;
4471
4472         case GAUDI_QUEUE_ID_MME_1_1:
4473                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4474                 break;
4475
4476         case GAUDI_QUEUE_ID_MME_1_2:
4477                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4478                 break;
4479
4480         case GAUDI_QUEUE_ID_MME_1_3:
4481                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4482                 break;
4483
4484         case GAUDI_QUEUE_ID_TPC_0_0:
4485                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4486                 break;
4487
4488         case GAUDI_QUEUE_ID_TPC_0_1:
4489                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4490                 break;
4491
4492         case GAUDI_QUEUE_ID_TPC_0_2:
4493                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4494                 break;
4495
4496         case GAUDI_QUEUE_ID_TPC_0_3:
4497                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4498                 break;
4499
4500         case GAUDI_QUEUE_ID_TPC_1_0:
4501                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4502                 break;
4503
4504         case GAUDI_QUEUE_ID_TPC_1_1:
4505                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4506                 break;
4507
4508         case GAUDI_QUEUE_ID_TPC_1_2:
4509                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4510                 break;
4511
4512         case GAUDI_QUEUE_ID_TPC_1_3:
4513                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4514                 break;
4515
4516         case GAUDI_QUEUE_ID_TPC_2_0:
4517                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4518                 break;
4519
4520         case GAUDI_QUEUE_ID_TPC_2_1:
4521                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4522                 break;
4523
4524         case GAUDI_QUEUE_ID_TPC_2_2:
4525                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4526                 break;
4527
4528         case GAUDI_QUEUE_ID_TPC_2_3:
4529                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4530                 break;
4531
4532         case GAUDI_QUEUE_ID_TPC_3_0:
4533                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4534                 break;
4535
4536         case GAUDI_QUEUE_ID_TPC_3_1:
4537                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4538                 break;
4539
4540         case GAUDI_QUEUE_ID_TPC_3_2:
4541                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4542                 break;
4543
4544         case GAUDI_QUEUE_ID_TPC_3_3:
4545                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4546                 break;
4547
4548         case GAUDI_QUEUE_ID_TPC_4_0:
4549                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4550                 break;
4551
4552         case GAUDI_QUEUE_ID_TPC_4_1:
4553                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4554                 break;
4555
4556         case GAUDI_QUEUE_ID_TPC_4_2:
4557                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4558                 break;
4559
4560         case GAUDI_QUEUE_ID_TPC_4_3:
4561                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4562                 break;
4563
4564         case GAUDI_QUEUE_ID_TPC_5_0:
4565                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4566                 break;
4567
4568         case GAUDI_QUEUE_ID_TPC_5_1:
4569                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4570                 break;
4571
4572         case GAUDI_QUEUE_ID_TPC_5_2:
4573                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4574                 break;
4575
4576         case GAUDI_QUEUE_ID_TPC_5_3:
4577                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4578                 break;
4579
4580         case GAUDI_QUEUE_ID_TPC_6_0:
4581                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4582                 break;
4583
4584         case GAUDI_QUEUE_ID_TPC_6_1:
4585                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4586                 break;
4587
4588         case GAUDI_QUEUE_ID_TPC_6_2:
4589                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4590                 break;
4591
4592         case GAUDI_QUEUE_ID_TPC_6_3:
4593                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4594                 break;
4595
4596         case GAUDI_QUEUE_ID_TPC_7_0:
4597                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4598                 break;
4599
4600         case GAUDI_QUEUE_ID_TPC_7_1:
4601                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4602                 break;
4603
4604         case GAUDI_QUEUE_ID_TPC_7_2:
4605                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4606                 break;
4607
4608         case GAUDI_QUEUE_ID_TPC_7_3:
4609                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4610                 break;
4611
4612         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4613                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4614                         invalid_queue = true;
4615
4616                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4617                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4618                 break;
4619
4620         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4621                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4622                         invalid_queue = true;
4623
4624                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4625                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4626                 break;
4627
4628         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4629                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4630                         invalid_queue = true;
4631
4632                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4633                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4634                 break;
4635
4636         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4637                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4638                         invalid_queue = true;
4639
4640                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4641                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4642                 break;
4643
4644         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4645                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4646                         invalid_queue = true;
4647
4648                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4649                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4650                 break;
4651
4652         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4653                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4654                         invalid_queue = true;
4655
4656                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4657                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4658                 break;
4659
4660         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4661                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4662                         invalid_queue = true;
4663
4664                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4665                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4666                 break;
4667
4668         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4669                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4670                         invalid_queue = true;
4671
4672                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4673                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4674                 break;
4675
4676         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4677                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4678                         invalid_queue = true;
4679
4680                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4681                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4682                 break;
4683
4684         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4685                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4686                         invalid_queue = true;
4687
4688                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4689                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4690                 break;
4691
4692         default:
4693                 invalid_queue = true;
4694         }
4695
4696         if (invalid_queue) {
4697                 /* Should never get here */
4698                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4699                         hw_queue_id);
4700                 return;
4701         }
4702
4703         db_value = pi;
4704
4705         /* ring the doorbell */
4706         WREG32(db_reg_offset, db_value);
4707
4708         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4709                 /* make sure device CPU will read latest data from host */
4710                 mb();
4711
4712                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4713                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4714                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4715
4716                 WREG32(irq_handler_offset,
4717                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4718         }
4719 }
4720
4721 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4722                                 struct hl_bd *bd)
4723 {
4724         __le64 *pbd = (__le64 *) bd;
4725
4726         /* The QMANs are on the host memory so a simple copy suffice */
4727         pqe[0] = pbd[0];
4728         pqe[1] = pbd[1];
4729 }
4730
4731 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4732                                         dma_addr_t *dma_handle, gfp_t flags)
4733 {
4734         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4735                                                 dma_handle, flags);
4736
4737         /* Shift to the device's base physical address of host memory */
4738         if (kernel_addr)
4739                 *dma_handle += HOST_PHYS_BASE;
4740
4741         return kernel_addr;
4742 }
4743
4744 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4745                 void *cpu_addr, dma_addr_t dma_handle)
4746 {
4747         /* Cancel the device's base physical address of host memory */
4748         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4749
4750         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4751 }
4752
4753 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4754 {
4755         struct asic_fixed_properties *prop = &hdev->asic_prop;
4756         u64  cur_addr = DRAM_BASE_ADDR_USER;
4757         u32 val;
4758         u32 chunk_size;
4759         int rc, dma_id;
4760
4761         while (cur_addr < prop->dram_end_address) {
4762                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4763                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4764
4765                         chunk_size =
4766                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4767
4768                         dev_dbg(hdev->dev,
4769                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4770                                 cur_addr, cur_addr + chunk_size);
4771
4772                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4773                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4774                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4775                                                 lower_32_bits(cur_addr));
4776                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4777                                                 upper_32_bits(cur_addr));
4778                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4779                                         chunk_size);
4780                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4781                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4782                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4783
4784                         cur_addr += chunk_size;
4785
4786                         if (cur_addr == prop->dram_end_address)
4787                                 break;
4788                 }
4789
4790                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4791                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4792
4793                         rc = hl_poll_timeout(
4794                                 hdev,
4795                                 mmDMA0_CORE_STS0 + dma_offset,
4796                                 val,
4797                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4798                                 1000,
4799                                 HBM_SCRUBBING_TIMEOUT_US);
4800
4801                         if (rc) {
4802                                 dev_err(hdev->dev,
4803                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4804                                         dma_id);
4805                                 return -EIO;
4806                         }
4807                 }
4808         }
4809
4810         return 0;
4811 }
4812
4813 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4814 {
4815         struct asic_fixed_properties *prop = &hdev->asic_prop;
4816         struct gaudi_device *gaudi = hdev->asic_specific;
4817         int rc = 0;
4818         u64 val = 0;
4819
4820         if (!hdev->memory_scrub)
4821                 return 0;
4822
4823         if (!addr && !size) {
4824                 /* Wait till device is idle */
4825                 rc = hl_poll_timeout(
4826                                 hdev,
4827                                 mmDMA0_CORE_STS0/* dummy */,
4828                                 val/* dummy */,
4829                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4830                                                 0, NULL)),
4831                                                 1000,
4832                                                 HBM_SCRUBBING_TIMEOUT_US);
4833                 if (rc) {
4834                         dev_err(hdev->dev, "waiting for idle timeout\n");
4835                         return -EIO;
4836                 }
4837
4838                 /* Scrub SRAM */
4839                 addr = prop->sram_user_base_address;
4840                 size = hdev->pldm ? 0x10000 :
4841                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4842                 val = 0x7777777777777777ull;
4843
4844                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4845                 if (rc) {
4846                         dev_err(hdev->dev,
4847                                 "Failed to clear SRAM in mem scrub all\n");
4848                         return rc;
4849                 }
4850
4851                 mutex_lock(&gaudi->clk_gate_mutex);
4852                 hdev->asic_funcs->disable_clock_gating(hdev);
4853
4854                 /* Scrub HBM using all DMA channels in parallel */
4855                 rc = gaudi_hbm_scrubbing(hdev);
4856                 if (rc)
4857                         dev_err(hdev->dev,
4858                                 "Failed to clear HBM in mem scrub all\n");
4859
4860                 hdev->asic_funcs->set_clock_gating(hdev);
4861                 mutex_unlock(&gaudi->clk_gate_mutex);
4862         }
4863
4864         return rc;
4865 }
4866
4867 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4868                                 u32 queue_id, dma_addr_t *dma_handle,
4869                                 u16 *queue_len)
4870 {
4871         struct gaudi_device *gaudi = hdev->asic_specific;
4872         struct gaudi_internal_qman_info *q;
4873
4874         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4875                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4876                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4877                 return NULL;
4878         }
4879
4880         q = &gaudi->internal_qmans[queue_id];
4881         *dma_handle = q->pq_dma_addr;
4882         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4883
4884         return q->pq_kernel_addr;
4885 }
4886
4887 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4888                                 u16 len, u32 timeout, u64 *result)
4889 {
4890         struct gaudi_device *gaudi = hdev->asic_specific;
4891
4892         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4893                 if (result)
4894                         *result = 0;
4895                 return 0;
4896         }
4897
4898         if (!timeout)
4899                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4900
4901         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4902                                                 timeout, result);
4903 }
4904
4905 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4906 {
4907         struct packet_msg_prot *fence_pkt;
4908         dma_addr_t pkt_dma_addr;
4909         u32 fence_val, tmp, timeout_usec;
4910         dma_addr_t fence_dma_addr;
4911         u32 *fence_ptr;
4912         int rc;
4913
4914         if (hdev->pldm)
4915                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4916         else
4917                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4918
4919         fence_val = GAUDI_QMAN0_FENCE_VAL;
4920
4921         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4922                                                         &fence_dma_addr);
4923         if (!fence_ptr) {
4924                 dev_err(hdev->dev,
4925                         "Failed to allocate memory for H/W queue %d testing\n",
4926                         hw_queue_id);
4927                 return -ENOMEM;
4928         }
4929
4930         *fence_ptr = 0;
4931
4932         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4933                                         sizeof(struct packet_msg_prot),
4934                                         GFP_KERNEL, &pkt_dma_addr);
4935         if (!fence_pkt) {
4936                 dev_err(hdev->dev,
4937                         "Failed to allocate packet for H/W queue %d testing\n",
4938                         hw_queue_id);
4939                 rc = -ENOMEM;
4940                 goto free_fence_ptr;
4941         }
4942
4943         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4944         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4945         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4946
4947         fence_pkt->ctl = cpu_to_le32(tmp);
4948         fence_pkt->value = cpu_to_le32(fence_val);
4949         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4950
4951         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4952                                         sizeof(struct packet_msg_prot),
4953                                         pkt_dma_addr);
4954         if (rc) {
4955                 dev_err(hdev->dev,
4956                         "Failed to send fence packet to H/W queue %d\n",
4957                         hw_queue_id);
4958                 goto free_pkt;
4959         }
4960
4961         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4962                                         1000, timeout_usec, true);
4963
4964         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4965
4966         if (rc == -ETIMEDOUT) {
4967                 dev_err(hdev->dev,
4968                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4969                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4970                 rc = -EIO;
4971         }
4972
4973 free_pkt:
4974         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4975                                         pkt_dma_addr);
4976 free_fence_ptr:
4977         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4978                                         fence_dma_addr);
4979         return rc;
4980 }
4981
4982 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4983 {
4984         struct gaudi_device *gaudi = hdev->asic_specific;
4985
4986         /*
4987          * check capability here as send_cpu_message() won't update the result
4988          * value if no capability
4989          */
4990         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4991                 return 0;
4992
4993         return hl_fw_test_cpu_queue(hdev);
4994 }
4995
4996 static int gaudi_test_queues(struct hl_device *hdev)
4997 {
4998         int i, rc, ret_val = 0;
4999
5000         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
5001                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5002                         rc = gaudi_test_queue(hdev, i);
5003                         if (rc)
5004                                 ret_val = -EINVAL;
5005                 }
5006         }
5007
5008         rc = gaudi_test_cpu_queue(hdev);
5009         if (rc)
5010                 ret_val = -EINVAL;
5011
5012         return ret_val;
5013 }
5014
5015 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5016                 gfp_t mem_flags, dma_addr_t *dma_handle)
5017 {
5018         void *kernel_addr;
5019
5020         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5021                 return NULL;
5022
5023         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5024
5025         /* Shift to the device's base physical address of host memory */
5026         if (kernel_addr)
5027                 *dma_handle += HOST_PHYS_BASE;
5028
5029         return kernel_addr;
5030 }
5031
5032 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5033                         dma_addr_t dma_addr)
5034 {
5035         /* Cancel the device's base physical address of host memory */
5036         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5037
5038         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5039 }
5040
5041 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5042                                         size_t size, dma_addr_t *dma_handle)
5043 {
5044         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5045 }
5046
5047 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5048                                                 size_t size, void *vaddr)
5049 {
5050         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5051 }
5052
5053 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5054                         int nents, enum dma_data_direction dir)
5055 {
5056         struct scatterlist *sg;
5057         int i;
5058
5059         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5060                 return -ENOMEM;
5061
5062         /* Shift to the device's base physical address of host memory */
5063         for_each_sg(sgl, sg, nents, i)
5064                 sg->dma_address += HOST_PHYS_BASE;
5065
5066         return 0;
5067 }
5068
5069 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5070                         int nents, enum dma_data_direction dir)
5071 {
5072         struct scatterlist *sg;
5073         int i;
5074
5075         /* Cancel the device's base physical address of host memory */
5076         for_each_sg(sgl, sg, nents, i)
5077                 sg->dma_address -= HOST_PHYS_BASE;
5078
5079         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5080 }
5081
5082 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5083                                         struct sg_table *sgt)
5084 {
5085         struct scatterlist *sg, *sg_next_iter;
5086         u32 count, dma_desc_cnt;
5087         u64 len, len_next;
5088         dma_addr_t addr, addr_next;
5089
5090         dma_desc_cnt = 0;
5091
5092         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5093
5094                 len = sg_dma_len(sg);
5095                 addr = sg_dma_address(sg);
5096
5097                 if (len == 0)
5098                         break;
5099
5100                 while ((count + 1) < sgt->nents) {
5101                         sg_next_iter = sg_next(sg);
5102                         len_next = sg_dma_len(sg_next_iter);
5103                         addr_next = sg_dma_address(sg_next_iter);
5104
5105                         if (len_next == 0)
5106                                 break;
5107
5108                         if ((addr + len == addr_next) &&
5109                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5110                                 len += len_next;
5111                                 count++;
5112                                 sg = sg_next_iter;
5113                         } else {
5114                                 break;
5115                         }
5116                 }
5117
5118                 dma_desc_cnt++;
5119         }
5120
5121         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5122 }
5123
5124 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5125                                 struct hl_cs_parser *parser,
5126                                 struct packet_lin_dma *user_dma_pkt,
5127                                 u64 addr, enum dma_data_direction dir)
5128 {
5129         struct hl_userptr *userptr;
5130         int rc;
5131
5132         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5133                         parser->job_userptr_list, &userptr))
5134                 goto already_pinned;
5135
5136         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5137         if (!userptr)
5138                 return -ENOMEM;
5139
5140         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5141                                 userptr);
5142         if (rc)
5143                 goto free_userptr;
5144
5145         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5146
5147         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5148                                         userptr->sgt->nents, dir);
5149         if (rc) {
5150                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5151                 goto unpin_memory;
5152         }
5153
5154         userptr->dma_mapped = true;
5155         userptr->dir = dir;
5156
5157 already_pinned:
5158         parser->patched_cb_size +=
5159                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5160
5161         return 0;
5162
5163 unpin_memory:
5164         list_del(&userptr->job_node);
5165         hl_unpin_host_memory(hdev, userptr);
5166 free_userptr:
5167         kfree(userptr);
5168         return rc;
5169 }
5170
5171 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5172                                 struct hl_cs_parser *parser,
5173                                 struct packet_lin_dma *user_dma_pkt,
5174                                 bool src_in_host)
5175 {
5176         enum dma_data_direction dir;
5177         bool skip_host_mem_pin = false, user_memset;
5178         u64 addr;
5179         int rc = 0;
5180
5181         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5182                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5183                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5184
5185         if (src_in_host) {
5186                 if (user_memset)
5187                         skip_host_mem_pin = true;
5188
5189                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5190                 dir = DMA_TO_DEVICE;
5191                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5192         } else {
5193                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5194                 dir = DMA_FROM_DEVICE;
5195                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5196                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5197                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5198         }
5199
5200         if (skip_host_mem_pin)
5201                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5202         else
5203                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5204                                                 addr, dir);
5205
5206         return rc;
5207 }
5208
5209 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5210                                 struct hl_cs_parser *parser,
5211                                 struct packet_lin_dma *user_dma_pkt)
5212 {
5213         bool src_in_host = false;
5214         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5215                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5216                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5217
5218         dev_dbg(hdev->dev, "DMA packet details:\n");
5219         dev_dbg(hdev->dev, "source == 0x%llx\n",
5220                                 le64_to_cpu(user_dma_pkt->src_addr));
5221         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5222         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5223
5224         /*
5225          * Special handling for DMA with size 0. Bypass all validations
5226          * because no transactions will be done except for WR_COMP, which
5227          * is not a security issue
5228          */
5229         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5230                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5231                 return 0;
5232         }
5233
5234         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5235                 src_in_host = true;
5236
5237         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5238                                                 src_in_host);
5239 }
5240
5241 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5242                                         struct hl_cs_parser *parser,
5243                                         struct packet_load_and_exe *user_pkt)
5244 {
5245         u32 cfg;
5246
5247         cfg = le32_to_cpu(user_pkt->cfg);
5248
5249         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5250                 dev_err(hdev->dev,
5251                         "User not allowed to use Load and Execute\n");
5252                 return -EPERM;
5253         }
5254
5255         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5256
5257         return 0;
5258 }
5259
5260 static int gaudi_validate_cb(struct hl_device *hdev,
5261                         struct hl_cs_parser *parser, bool is_mmu)
5262 {
5263         u32 cb_parsed_length = 0;
5264         int rc = 0;
5265
5266         parser->patched_cb_size = 0;
5267
5268         /* cb_user_size is more than 0 so loop will always be executed */
5269         while (cb_parsed_length < parser->user_cb_size) {
5270                 enum packet_id pkt_id;
5271                 u16 pkt_size;
5272                 struct gaudi_packet *user_pkt;
5273
5274                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5275
5276                 pkt_id = (enum packet_id) (
5277                                 (le64_to_cpu(user_pkt->header) &
5278                                 PACKET_HEADER_PACKET_ID_MASK) >>
5279                                         PACKET_HEADER_PACKET_ID_SHIFT);
5280
5281                 if (!validate_packet_id(pkt_id)) {
5282                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5283                         rc = -EINVAL;
5284                         break;
5285                 }
5286
5287                 pkt_size = gaudi_packet_sizes[pkt_id];
5288                 cb_parsed_length += pkt_size;
5289                 if (cb_parsed_length > parser->user_cb_size) {
5290                         dev_err(hdev->dev,
5291                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5292                         rc = -EINVAL;
5293                         break;
5294                 }
5295
5296                 switch (pkt_id) {
5297                 case PACKET_MSG_PROT:
5298                         dev_err(hdev->dev,
5299                                 "User not allowed to use MSG_PROT\n");
5300                         rc = -EPERM;
5301                         break;
5302
5303                 case PACKET_CP_DMA:
5304                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5305                         rc = -EPERM;
5306                         break;
5307
5308                 case PACKET_STOP:
5309                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5310                         rc = -EPERM;
5311                         break;
5312
5313                 case PACKET_WREG_BULK:
5314                         dev_err(hdev->dev,
5315                                 "User not allowed to use WREG_BULK\n");
5316                         rc = -EPERM;
5317                         break;
5318
5319                 case PACKET_LOAD_AND_EXE:
5320                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5321                                 (struct packet_load_and_exe *) user_pkt);
5322                         break;
5323
5324                 case PACKET_LIN_DMA:
5325                         parser->contains_dma_pkt = true;
5326                         if (is_mmu)
5327                                 parser->patched_cb_size += pkt_size;
5328                         else
5329                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5330                                         (struct packet_lin_dma *) user_pkt);
5331                         break;
5332
5333                 case PACKET_WREG_32:
5334                 case PACKET_MSG_LONG:
5335                 case PACKET_MSG_SHORT:
5336                 case PACKET_REPEAT:
5337                 case PACKET_FENCE:
5338                 case PACKET_NOP:
5339                 case PACKET_ARB_POINT:
5340                         parser->patched_cb_size += pkt_size;
5341                         break;
5342
5343                 default:
5344                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5345                                 pkt_id);
5346                         rc = -EINVAL;
5347                         break;
5348                 }
5349
5350                 if (rc)
5351                         break;
5352         }
5353
5354         /*
5355          * The new CB should have space at the end for two MSG_PROT packets:
5356          * 1. A packet that will act as a completion packet
5357          * 2. A packet that will generate MSI-X interrupt
5358          */
5359         if (parser->completion)
5360                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5361
5362         return rc;
5363 }
5364
5365 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5366                                 struct hl_cs_parser *parser,
5367                                 struct packet_lin_dma *user_dma_pkt,
5368                                 struct packet_lin_dma *new_dma_pkt,
5369                                 u32 *new_dma_pkt_size)
5370 {
5371         struct hl_userptr *userptr;
5372         struct scatterlist *sg, *sg_next_iter;
5373         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5374         u64 len, len_next;
5375         dma_addr_t dma_addr, dma_addr_next;
5376         u64 device_memory_addr, addr;
5377         enum dma_data_direction dir;
5378         struct sg_table *sgt;
5379         bool src_in_host = false;
5380         bool skip_host_mem_pin = false;
5381         bool user_memset;
5382
5383         ctl = le32_to_cpu(user_dma_pkt->ctl);
5384
5385         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5386                 src_in_host = true;
5387
5388         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5389                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5390
5391         if (src_in_host) {
5392                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5393                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5394                 dir = DMA_TO_DEVICE;
5395                 if (user_memset)
5396                         skip_host_mem_pin = true;
5397         } else {
5398                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5399                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5400                 dir = DMA_FROM_DEVICE;
5401         }
5402
5403         if ((!skip_host_mem_pin) &&
5404                 (!hl_userptr_is_pinned(hdev, addr,
5405                                         le32_to_cpu(user_dma_pkt->tsize),
5406                                         parser->job_userptr_list, &userptr))) {
5407                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5408                                 addr, user_dma_pkt->tsize);
5409                 return -EFAULT;
5410         }
5411
5412         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5413                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5414                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5415                 return 0;
5416         }
5417
5418         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5419
5420         sgt = userptr->sgt;
5421         dma_desc_cnt = 0;
5422
5423         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5424                 len = sg_dma_len(sg);
5425                 dma_addr = sg_dma_address(sg);
5426
5427                 if (len == 0)
5428                         break;
5429
5430                 while ((count + 1) < sgt->nents) {
5431                         sg_next_iter = sg_next(sg);
5432                         len_next = sg_dma_len(sg_next_iter);
5433                         dma_addr_next = sg_dma_address(sg_next_iter);
5434
5435                         if (len_next == 0)
5436                                 break;
5437
5438                         if ((dma_addr + len == dma_addr_next) &&
5439                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5440                                 len += len_next;
5441                                 count++;
5442                                 sg = sg_next_iter;
5443                         } else {
5444                                 break;
5445                         }
5446                 }
5447
5448                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5449                 if (likely(dma_desc_cnt))
5450                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5451                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5452                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5453                 new_dma_pkt->tsize = cpu_to_le32(len);
5454
5455                 if (dir == DMA_TO_DEVICE) {
5456                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5457                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5458                 } else {
5459                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5460                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5461                 }
5462
5463                 if (!user_memset)
5464                         device_memory_addr += len;
5465                 dma_desc_cnt++;
5466                 new_dma_pkt++;
5467         }
5468
5469         if (!dma_desc_cnt) {
5470                 dev_err(hdev->dev,
5471                         "Error of 0 SG entries when patching DMA packet\n");
5472                 return -EFAULT;
5473         }
5474
5475         /* Fix the last dma packet - wrcomp must be as user set it */
5476         new_dma_pkt--;
5477         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5478
5479         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5480
5481         return 0;
5482 }
5483
5484 static int gaudi_patch_cb(struct hl_device *hdev,
5485                                 struct hl_cs_parser *parser)
5486 {
5487         u32 cb_parsed_length = 0;
5488         u32 cb_patched_cur_length = 0;
5489         int rc = 0;
5490
5491         /* cb_user_size is more than 0 so loop will always be executed */
5492         while (cb_parsed_length < parser->user_cb_size) {
5493                 enum packet_id pkt_id;
5494                 u16 pkt_size;
5495                 u32 new_pkt_size = 0;
5496                 struct gaudi_packet *user_pkt, *kernel_pkt;
5497
5498                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5499                 kernel_pkt = parser->patched_cb->kernel_address +
5500                                         cb_patched_cur_length;
5501
5502                 pkt_id = (enum packet_id) (
5503                                 (le64_to_cpu(user_pkt->header) &
5504                                 PACKET_HEADER_PACKET_ID_MASK) >>
5505                                         PACKET_HEADER_PACKET_ID_SHIFT);
5506
5507                 if (!validate_packet_id(pkt_id)) {
5508                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5509                         rc = -EINVAL;
5510                         break;
5511                 }
5512
5513                 pkt_size = gaudi_packet_sizes[pkt_id];
5514                 cb_parsed_length += pkt_size;
5515                 if (cb_parsed_length > parser->user_cb_size) {
5516                         dev_err(hdev->dev,
5517                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5518                         rc = -EINVAL;
5519                         break;
5520                 }
5521
5522                 switch (pkt_id) {
5523                 case PACKET_LIN_DMA:
5524                         rc = gaudi_patch_dma_packet(hdev, parser,
5525                                         (struct packet_lin_dma *) user_pkt,
5526                                         (struct packet_lin_dma *) kernel_pkt,
5527                                         &new_pkt_size);
5528                         cb_patched_cur_length += new_pkt_size;
5529                         break;
5530
5531                 case PACKET_MSG_PROT:
5532                         dev_err(hdev->dev,
5533                                 "User not allowed to use MSG_PROT\n");
5534                         rc = -EPERM;
5535                         break;
5536
5537                 case PACKET_CP_DMA:
5538                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5539                         rc = -EPERM;
5540                         break;
5541
5542                 case PACKET_STOP:
5543                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5544                         rc = -EPERM;
5545                         break;
5546
5547                 case PACKET_WREG_32:
5548                 case PACKET_WREG_BULK:
5549                 case PACKET_MSG_LONG:
5550                 case PACKET_MSG_SHORT:
5551                 case PACKET_REPEAT:
5552                 case PACKET_FENCE:
5553                 case PACKET_NOP:
5554                 case PACKET_ARB_POINT:
5555                 case PACKET_LOAD_AND_EXE:
5556                         memcpy(kernel_pkt, user_pkt, pkt_size);
5557                         cb_patched_cur_length += pkt_size;
5558                         break;
5559
5560                 default:
5561                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5562                                 pkt_id);
5563                         rc = -EINVAL;
5564                         break;
5565                 }
5566
5567                 if (rc)
5568                         break;
5569         }
5570
5571         return rc;
5572 }
5573
5574 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5575                 struct hl_cs_parser *parser)
5576 {
5577         u64 patched_cb_handle;
5578         u32 patched_cb_size;
5579         struct hl_cb *user_cb;
5580         int rc;
5581
5582         /*
5583          * The new CB should have space at the end for two MSG_PROT pkt:
5584          * 1. A packet that will act as a completion packet
5585          * 2. A packet that will generate MSI interrupt
5586          */
5587         if (parser->completion)
5588                 parser->patched_cb_size = parser->user_cb_size +
5589                                 sizeof(struct packet_msg_prot) * 2;
5590         else
5591                 parser->patched_cb_size = parser->user_cb_size;
5592
5593         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5594                                 parser->patched_cb_size, false, false,
5595                                 &patched_cb_handle);
5596
5597         if (rc) {
5598                 dev_err(hdev->dev,
5599                         "Failed to allocate patched CB for DMA CS %d\n",
5600                         rc);
5601                 return rc;
5602         }
5603
5604         patched_cb_handle >>= PAGE_SHIFT;
5605         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5606                                 (u32) patched_cb_handle);
5607         /* hl_cb_get should never fail */
5608         if (!parser->patched_cb) {
5609                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5610                         (u32) patched_cb_handle);
5611                 rc = -EFAULT;
5612                 goto out;
5613         }
5614
5615         /*
5616          * The check that parser->user_cb_size <= parser->user_cb->size was done
5617          * in validate_queue_index().
5618          */
5619         memcpy(parser->patched_cb->kernel_address,
5620                 parser->user_cb->kernel_address,
5621                 parser->user_cb_size);
5622
5623         patched_cb_size = parser->patched_cb_size;
5624
5625         /* Validate patched CB instead of user CB */
5626         user_cb = parser->user_cb;
5627         parser->user_cb = parser->patched_cb;
5628         rc = gaudi_validate_cb(hdev, parser, true);
5629         parser->user_cb = user_cb;
5630
5631         if (rc) {
5632                 hl_cb_put(parser->patched_cb);
5633                 goto out;
5634         }
5635
5636         if (patched_cb_size != parser->patched_cb_size) {
5637                 dev_err(hdev->dev, "user CB size mismatch\n");
5638                 hl_cb_put(parser->patched_cb);
5639                 rc = -EINVAL;
5640                 goto out;
5641         }
5642
5643 out:
5644         /*
5645          * Always call cb destroy here because we still have 1 reference
5646          * to it by calling cb_get earlier. After the job will be completed,
5647          * cb_put will release it, but here we want to remove it from the
5648          * idr
5649          */
5650         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5651                                         patched_cb_handle << PAGE_SHIFT);
5652
5653         return rc;
5654 }
5655
5656 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5657                 struct hl_cs_parser *parser)
5658 {
5659         u64 patched_cb_handle;
5660         int rc;
5661
5662         rc = gaudi_validate_cb(hdev, parser, false);
5663
5664         if (rc)
5665                 goto free_userptr;
5666
5667         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5668                                 parser->patched_cb_size, false, false,
5669                                 &patched_cb_handle);
5670         if (rc) {
5671                 dev_err(hdev->dev,
5672                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5673                 goto free_userptr;
5674         }
5675
5676         patched_cb_handle >>= PAGE_SHIFT;
5677         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5678                                 (u32) patched_cb_handle);
5679         /* hl_cb_get should never fail here */
5680         if (!parser->patched_cb) {
5681                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5682                                 (u32) patched_cb_handle);
5683                 rc = -EFAULT;
5684                 goto out;
5685         }
5686
5687         rc = gaudi_patch_cb(hdev, parser);
5688
5689         if (rc)
5690                 hl_cb_put(parser->patched_cb);
5691
5692 out:
5693         /*
5694          * Always call cb destroy here because we still have 1 reference
5695          * to it by calling cb_get earlier. After the job will be completed,
5696          * cb_put will release it, but here we want to remove it from the
5697          * idr
5698          */
5699         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5700                                 patched_cb_handle << PAGE_SHIFT);
5701
5702 free_userptr:
5703         if (rc)
5704                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5705         return rc;
5706 }
5707
5708 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5709                                         struct hl_cs_parser *parser)
5710 {
5711         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5712         struct gaudi_device *gaudi = hdev->asic_specific;
5713         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5714                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5715
5716         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5717                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5718                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5719                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5720                                 parser->hw_queue_id);
5721                 return -EINVAL;
5722         }
5723
5724         /* For internal queue jobs just check if CB address is valid */
5725         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5726                                         parser->user_cb_size,
5727                                         asic_prop->sram_user_base_address,
5728                                         asic_prop->sram_end_address))
5729                 return 0;
5730
5731         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5732                                         parser->user_cb_size,
5733                                         asic_prop->dram_user_base_address,
5734                                         asic_prop->dram_end_address))
5735                 return 0;
5736
5737         /* PMMU and HPMMU addresses are equal, check only one of them */
5738         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5739                                         parser->user_cb_size,
5740                                         asic_prop->pmmu.start_addr,
5741                                         asic_prop->pmmu.end_addr))
5742                 return 0;
5743
5744         dev_err(hdev->dev,
5745                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5746                 parser->user_cb, parser->user_cb_size);
5747
5748         return -EFAULT;
5749 }
5750
5751 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5752 {
5753         struct gaudi_device *gaudi = hdev->asic_specific;
5754
5755         if (parser->queue_type == QUEUE_TYPE_INT)
5756                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5757
5758         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5759                 return gaudi_parse_cb_mmu(hdev, parser);
5760         else
5761                 return gaudi_parse_cb_no_mmu(hdev, parser);
5762 }
5763
5764 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5765                                         void *kernel_address, u32 len,
5766                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5767                                         bool eb)
5768 {
5769         struct gaudi_device *gaudi = hdev->asic_specific;
5770         struct packet_msg_prot *cq_pkt;
5771         u32 tmp;
5772
5773         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5774
5775         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5776         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5777
5778         if (eb)
5779                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5780
5781         cq_pkt->ctl = cpu_to_le32(tmp);
5782         cq_pkt->value = cpu_to_le32(cq_val);
5783         cq_pkt->addr = cpu_to_le64(cq_addr);
5784
5785         cq_pkt++;
5786
5787         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5788         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5789         cq_pkt->ctl = cpu_to_le32(tmp);
5790         cq_pkt->value = cpu_to_le32(1);
5791
5792         if (!gaudi->multi_msi_mode)
5793                 msi_vec = 0;
5794
5795         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5796 }
5797
5798 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5799 {
5800         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5801 }
5802
5803 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5804                                         u32 size, u64 val)
5805 {
5806         struct packet_lin_dma *lin_dma_pkt;
5807         struct hl_cs_job *job;
5808         u32 cb_size, ctl, err_cause;
5809         struct hl_cb *cb;
5810         u64 id;
5811         int rc;
5812
5813         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5814         if (!cb)
5815                 return -EFAULT;
5816
5817         lin_dma_pkt = cb->kernel_address;
5818         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5819         cb_size = sizeof(*lin_dma_pkt);
5820
5821         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5822         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5823         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5824         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5825         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5826
5827         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5828         lin_dma_pkt->src_addr = cpu_to_le64(val);
5829         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5830         lin_dma_pkt->tsize = cpu_to_le32(size);
5831
5832         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5833         if (!job) {
5834                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5835                 rc = -ENOMEM;
5836                 goto release_cb;
5837         }
5838
5839         /* Verify DMA is OK */
5840         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5841         if (err_cause && !hdev->init_done) {
5842                 dev_dbg(hdev->dev,
5843                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5844                         err_cause);
5845                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5846         }
5847
5848         job->id = 0;
5849         job->user_cb = cb;
5850         atomic_inc(&job->user_cb->cs_cnt);
5851         job->user_cb_size = cb_size;
5852         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5853         job->patched_cb = job->user_cb;
5854         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5855
5856         hl_debugfs_add_job(hdev, job);
5857
5858         rc = gaudi_send_job_on_qman0(hdev, job);
5859         hl_debugfs_remove_job(hdev, job);
5860         kfree(job);
5861         atomic_dec(&cb->cs_cnt);
5862
5863         /* Verify DMA is OK */
5864         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5865         if (err_cause) {
5866                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5867                 rc = -EIO;
5868                 if (!hdev->init_done) {
5869                         dev_dbg(hdev->dev,
5870                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5871                                 err_cause);
5872                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5873                 }
5874         }
5875
5876 release_cb:
5877         id = cb->id;
5878         hl_cb_put(cb);
5879         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5880
5881         return rc;
5882 }
5883
5884 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5885                                         u32 num_regs, u32 val)
5886 {
5887         struct packet_msg_long *pkt;
5888         struct hl_cs_job *job;
5889         u32 cb_size, ctl;
5890         struct hl_cb *cb;
5891         int i, rc;
5892
5893         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5894
5895         if (cb_size > SZ_2M) {
5896                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5897                 return -ENOMEM;
5898         }
5899
5900         cb = hl_cb_kernel_create(hdev, cb_size, false);
5901         if (!cb)
5902                 return -EFAULT;
5903
5904         pkt = cb->kernel_address;
5905
5906         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5907         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5908         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5909         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5910         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5911
5912         for (i = 0; i < num_regs ; i++, pkt++) {
5913                 pkt->ctl = cpu_to_le32(ctl);
5914                 pkt->value = cpu_to_le32(val);
5915                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5916         }
5917
5918         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5919         if (!job) {
5920                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5921                 rc = -ENOMEM;
5922                 goto release_cb;
5923         }
5924
5925         job->id = 0;
5926         job->user_cb = cb;
5927         atomic_inc(&job->user_cb->cs_cnt);
5928         job->user_cb_size = cb_size;
5929         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5930         job->patched_cb = job->user_cb;
5931         job->job_cb_size = cb_size;
5932
5933         hl_debugfs_add_job(hdev, job);
5934
5935         rc = gaudi_send_job_on_qman0(hdev, job);
5936         hl_debugfs_remove_job(hdev, job);
5937         kfree(job);
5938         atomic_dec(&cb->cs_cnt);
5939
5940 release_cb:
5941         hl_cb_put(cb);
5942         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5943
5944         return rc;
5945 }
5946
5947 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5948 {
5949         u64 base_addr;
5950         u32 num_regs;
5951         int rc;
5952
5953         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5954         num_regs = NUM_OF_SOB_IN_BLOCK;
5955         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5956         if (rc) {
5957                 dev_err(hdev->dev, "failed resetting SM registers");
5958                 return -ENOMEM;
5959         }
5960
5961         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5962         num_regs = NUM_OF_SOB_IN_BLOCK;
5963         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5964         if (rc) {
5965                 dev_err(hdev->dev, "failed resetting SM registers");
5966                 return -ENOMEM;
5967         }
5968
5969         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5970         num_regs = NUM_OF_SOB_IN_BLOCK;
5971         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5972         if (rc) {
5973                 dev_err(hdev->dev, "failed resetting SM registers");
5974                 return -ENOMEM;
5975         }
5976
5977         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5978         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5979         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5980         if (rc) {
5981                 dev_err(hdev->dev, "failed resetting SM registers");
5982                 return -ENOMEM;
5983         }
5984
5985         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5986         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5987         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5988         if (rc) {
5989                 dev_err(hdev->dev, "failed resetting SM registers");
5990                 return -ENOMEM;
5991         }
5992
5993         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5994         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5995         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5996         if (rc) {
5997                 dev_err(hdev->dev, "failed resetting SM registers");
5998                 return -ENOMEM;
5999         }
6000
6001         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6002                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6003         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6004         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6005         if (rc) {
6006                 dev_err(hdev->dev, "failed resetting SM registers");
6007                 return -ENOMEM;
6008         }
6009
6010         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6011                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6012         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6013         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6014         if (rc) {
6015                 dev_err(hdev->dev, "failed resetting SM registers");
6016                 return -ENOMEM;
6017         }
6018
6019         return 0;
6020 }
6021
6022 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6023 {
6024         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6025                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6026         int i;
6027
6028         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6029                 u64 sob_addr = CFG_BASE +
6030                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6031                                 (i * sob_delta);
6032                 u32 dma_offset = i * DMA_CORE_OFFSET;
6033
6034                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6035                                 lower_32_bits(sob_addr));
6036                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6037                                 upper_32_bits(sob_addr));
6038                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6039
6040                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6041                  * modified by the user for SRAM reduction
6042                  */
6043                 if (i > 1)
6044                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6045                                                                 0x00000001);
6046         }
6047 }
6048
6049 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6050 {
6051         u32 qman_offset;
6052         int i;
6053
6054         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6055                 qman_offset = i * DMA_QMAN_OFFSET;
6056                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6057         }
6058
6059         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6060                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6061                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6062         }
6063
6064         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6065                 qman_offset = i * TPC_QMAN_OFFSET;
6066                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6067         }
6068
6069         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6070                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6071                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6072                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6073         }
6074 }
6075
6076 static int gaudi_restore_user_registers(struct hl_device *hdev)
6077 {
6078         int rc;
6079
6080         rc = gaudi_restore_sm_registers(hdev);
6081         if (rc)
6082                 return rc;
6083
6084         gaudi_restore_dma_registers(hdev);
6085         gaudi_restore_qm_registers(hdev);
6086
6087         return 0;
6088 }
6089
6090 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6091 {
6092         return 0;
6093 }
6094
6095 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6096 {
6097         struct asic_fixed_properties *prop = &hdev->asic_prop;
6098         struct gaudi_device *gaudi = hdev->asic_specific;
6099         u64 addr = prop->mmu_pgt_addr;
6100         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6101
6102         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6103                 return 0;
6104
6105         return gaudi_memset_device_memory(hdev, addr, size, 0);
6106 }
6107
6108 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6109 {
6110
6111 }
6112
6113 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6114                         bool user_address, u32 *val)
6115 {
6116         struct asic_fixed_properties *prop = &hdev->asic_prop;
6117         struct gaudi_device *gaudi = hdev->asic_specific;
6118         u64 hbm_bar_addr, host_phys_end;
6119         int rc = 0;
6120
6121         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6122
6123         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6124
6125                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6126                                 (hdev->clock_gating_mask &
6127                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6128
6129                         dev_err_ratelimited(hdev->dev,
6130                                 "Can't read register - clock gating is enabled!\n");
6131                         rc = -EFAULT;
6132                 } else {
6133                         *val = RREG32(addr - CFG_BASE);
6134                 }
6135
6136         } else if ((addr >= SRAM_BASE_ADDR) &&
6137                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6138                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6139                                 (addr - SRAM_BASE_ADDR));
6140         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6141                 u64 bar_base_addr = DRAM_PHYS_BASE +
6142                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6143
6144                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6145                 if (hbm_bar_addr != U64_MAX) {
6146                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6147                                                 (addr - bar_base_addr));
6148
6149                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6150                                                 hbm_bar_addr);
6151                 }
6152                 if (hbm_bar_addr == U64_MAX)
6153                         rc = -EIO;
6154         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6155                         user_address && !iommu_present(&pci_bus_type)) {
6156                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6157         } else {
6158                 rc = -EFAULT;
6159         }
6160
6161         return rc;
6162 }
6163
6164 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6165                         bool user_address, u32 val)
6166 {
6167         struct asic_fixed_properties *prop = &hdev->asic_prop;
6168         struct gaudi_device *gaudi = hdev->asic_specific;
6169         u64 hbm_bar_addr, host_phys_end;
6170         int rc = 0;
6171
6172         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6173
6174         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6175
6176                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6177                                 (hdev->clock_gating_mask &
6178                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6179
6180                         dev_err_ratelimited(hdev->dev,
6181                                 "Can't write register - clock gating is enabled!\n");
6182                         rc = -EFAULT;
6183                 } else {
6184                         WREG32(addr - CFG_BASE, val);
6185                 }
6186
6187         } else if ((addr >= SRAM_BASE_ADDR) &&
6188                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6189                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6190                                         (addr - SRAM_BASE_ADDR));
6191         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6192                 u64 bar_base_addr = DRAM_PHYS_BASE +
6193                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6194
6195                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6196                 if (hbm_bar_addr != U64_MAX) {
6197                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6198                                                 (addr - bar_base_addr));
6199
6200                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6201                                                 hbm_bar_addr);
6202                 }
6203                 if (hbm_bar_addr == U64_MAX)
6204                         rc = -EIO;
6205         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6206                         user_address && !iommu_present(&pci_bus_type)) {
6207                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6208         } else {
6209                 rc = -EFAULT;
6210         }
6211
6212         return rc;
6213 }
6214
6215 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6216                                 bool user_address, u64 *val)
6217 {
6218         struct asic_fixed_properties *prop = &hdev->asic_prop;
6219         struct gaudi_device *gaudi = hdev->asic_specific;
6220         u64 hbm_bar_addr, host_phys_end;
6221         int rc = 0;
6222
6223         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6224
6225         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6226
6227                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6228                                 (hdev->clock_gating_mask &
6229                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6230
6231                         dev_err_ratelimited(hdev->dev,
6232                                 "Can't read register - clock gating is enabled!\n");
6233                         rc = -EFAULT;
6234                 } else {
6235                         u32 val_l = RREG32(addr - CFG_BASE);
6236                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6237
6238                         *val = (((u64) val_h) << 32) | val_l;
6239                 }
6240
6241         } else if ((addr >= SRAM_BASE_ADDR) &&
6242                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6243                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6244                                 (addr - SRAM_BASE_ADDR));
6245         } else if (addr <=
6246                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6247                 u64 bar_base_addr = DRAM_PHYS_BASE +
6248                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6249
6250                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6251                 if (hbm_bar_addr != U64_MAX) {
6252                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6253                                                 (addr - bar_base_addr));
6254
6255                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6256                                                 hbm_bar_addr);
6257                 }
6258                 if (hbm_bar_addr == U64_MAX)
6259                         rc = -EIO;
6260         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6261                         user_address && !iommu_present(&pci_bus_type)) {
6262                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6263         } else {
6264                 rc = -EFAULT;
6265         }
6266
6267         return rc;
6268 }
6269
6270 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6271                                 bool user_address, u64 val)
6272 {
6273         struct asic_fixed_properties *prop = &hdev->asic_prop;
6274         struct gaudi_device *gaudi = hdev->asic_specific;
6275         u64 hbm_bar_addr, host_phys_end;
6276         int rc = 0;
6277
6278         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6279
6280         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6281
6282                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6283                                 (hdev->clock_gating_mask &
6284                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6285
6286                         dev_err_ratelimited(hdev->dev,
6287                                 "Can't write register - clock gating is enabled!\n");
6288                         rc = -EFAULT;
6289                 } else {
6290                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6291                         WREG32(addr + sizeof(u32) - CFG_BASE,
6292                                 upper_32_bits(val));
6293                 }
6294
6295         } else if ((addr >= SRAM_BASE_ADDR) &&
6296                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6297                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6298                                         (addr - SRAM_BASE_ADDR));
6299         } else if (addr <=
6300                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6301                 u64 bar_base_addr = DRAM_PHYS_BASE +
6302                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6303
6304                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6305                 if (hbm_bar_addr != U64_MAX) {
6306                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6307                                                 (addr - bar_base_addr));
6308
6309                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6310                                                 hbm_bar_addr);
6311                 }
6312                 if (hbm_bar_addr == U64_MAX)
6313                         rc = -EIO;
6314         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6315                         user_address && !iommu_present(&pci_bus_type)) {
6316                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6317         } else {
6318                 rc = -EFAULT;
6319         }
6320
6321         return rc;
6322 }
6323
6324 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6325                                         u32 size_to_dma, dma_addr_t dma_addr)
6326 {
6327         u32 err_cause, val;
6328         u64 dma_offset;
6329         int rc;
6330
6331         dma_offset = dma_id * DMA_CORE_OFFSET;
6332
6333         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6334         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6335         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6336         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6337         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6338         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6339                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6340
6341         rc = hl_poll_timeout(
6342                 hdev,
6343                 mmDMA0_CORE_STS0 + dma_offset,
6344                 val,
6345                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6346                 0,
6347                 1000000);
6348
6349         if (rc) {
6350                 dev_err(hdev->dev,
6351                         "DMA %d timed-out during reading of 0x%llx\n",
6352                         dma_id, addr);
6353                 return -EIO;
6354         }
6355
6356         /* Verify DMA is OK */
6357         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6358         if (err_cause) {
6359                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6360                 dev_dbg(hdev->dev,
6361                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6362                         err_cause);
6363                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6364
6365                 return -EIO;
6366         }
6367
6368         return 0;
6369 }
6370
6371 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6372                                 void *blob_addr)
6373 {
6374         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6375         struct gaudi_device *gaudi = hdev->asic_specific;
6376         u64 dma_offset, qm_offset;
6377         dma_addr_t dma_addr;
6378         void *kernel_addr;
6379         bool is_eng_idle;
6380         int rc = 0, dma_id;
6381
6382         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6383                                                 hdev, SZ_2M,
6384                                                 &dma_addr,
6385                                                 GFP_KERNEL | __GFP_ZERO);
6386
6387         if (!kernel_addr)
6388                 return -ENOMEM;
6389
6390         mutex_lock(&gaudi->clk_gate_mutex);
6391
6392         hdev->asic_funcs->disable_clock_gating(hdev);
6393
6394         hdev->asic_funcs->hw_queues_lock(hdev);
6395
6396         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6397         dma_offset = dma_id * DMA_CORE_OFFSET;
6398         qm_offset = dma_id * DMA_QMAN_OFFSET;
6399         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6400         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6401
6402         if (!is_eng_idle) {
6403                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6404                 dma_offset = dma_id * DMA_CORE_OFFSET;
6405                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6406                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6407                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6408
6409                 if (!is_eng_idle) {
6410                         dev_err_ratelimited(hdev->dev,
6411                                 "Can't read via DMA because it is BUSY\n");
6412                         rc = -EAGAIN;
6413                         goto out;
6414                 }
6415         }
6416
6417         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6418         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6419                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6420
6421         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6422          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6423          * ASID
6424          */
6425         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6426
6427         /* Verify DMA is OK */
6428         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6429         if (err_cause) {
6430                 dev_dbg(hdev->dev,
6431                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6432                         err_cause);
6433                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6434         }
6435
6436         pos = 0;
6437         size_left = size;
6438         size_to_dma = SZ_2M;
6439
6440         while (size_left > 0) {
6441
6442                 if (size_left < SZ_2M)
6443                         size_to_dma = size_left;
6444
6445                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6446                                                 dma_addr);
6447                 if (rc)
6448                         break;
6449
6450                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6451
6452                 if (size_left <= SZ_2M)
6453                         break;
6454
6455                 pos += SZ_2M;
6456                 addr += SZ_2M;
6457                 size_left -= SZ_2M;
6458         }
6459
6460         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6461          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6462          * ASID
6463          */
6464         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6465                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6466
6467         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6468
6469 out:
6470         hdev->asic_funcs->hw_queues_unlock(hdev);
6471
6472         hdev->asic_funcs->set_clock_gating(hdev);
6473
6474         mutex_unlock(&gaudi->clk_gate_mutex);
6475
6476         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6477                                                 dma_addr);
6478
6479         return rc;
6480 }
6481
6482 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6483 {
6484         struct gaudi_device *gaudi = hdev->asic_specific;
6485
6486         if (hdev->hard_reset_pending)
6487                 return U64_MAX;
6488
6489         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6490                         (addr - gaudi->hbm_bar_cur_addr));
6491 }
6492
6493 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6494 {
6495         struct gaudi_device *gaudi = hdev->asic_specific;
6496
6497         if (hdev->hard_reset_pending)
6498                 return;
6499
6500         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6501                         (addr - gaudi->hbm_bar_cur_addr));
6502 }
6503
6504 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6505 {
6506         /* mask to zero the MMBP and ASID bits */
6507         WREG32_AND(reg, ~0x7FF);
6508         WREG32_OR(reg, asid);
6509 }
6510
6511 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6512 {
6513         struct gaudi_device *gaudi = hdev->asic_specific;
6514
6515         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6516                 return;
6517
6518         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6519                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6520                 return;
6521         }
6522
6523         mutex_lock(&gaudi->clk_gate_mutex);
6524
6525         hdev->asic_funcs->disable_clock_gating(hdev);
6526
6527         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6528         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6529         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6530         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6531         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6532
6533         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6534         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6535         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6536         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6537         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6538
6539         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6540         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6541         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6542         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6543         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6544
6545         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6547         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6548         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6550
6551         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6555         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6556
6557         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6560         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6562
6563         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6568
6569         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6574
6575         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6583
6584         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6591
6592         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6599
6600         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6607
6608         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6615
6616         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6623
6624         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6626         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6628         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6631
6632         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6637         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6638         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6639
6640         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6644         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6647
6648         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6649         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6650         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6652         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6656         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6657         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6658
6659         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6660         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6665         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6669         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6670         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6671
6672         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6673                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6674                                 asid);
6675                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6676                                 asid);
6677                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6678                                 asid);
6679                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6680                                 asid);
6681                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6682                                 asid);
6683         }
6684
6685         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6686                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6687                                 asid);
6688                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6689                                 asid);
6690                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6691                                 asid);
6692                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6693                                 asid);
6694                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6695                                 asid);
6696         }
6697
6698         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6699                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6700                                 asid);
6701                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6702                                 asid);
6703                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6704                                 asid);
6705                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6706                                 asid);
6707                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6708                                 asid);
6709         }
6710
6711         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6713                                 asid);
6714                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6715                                 asid);
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6719                                 asid);
6720                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6721                                 asid);
6722         }
6723
6724         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6726                                 asid);
6727                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6728                                 asid);
6729                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6730                                 asid);
6731                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6732                                 asid);
6733                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6734                                 asid);
6735         }
6736
6737         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6739                                 asid);
6740                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6741                                 asid);
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6745                                 asid);
6746                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6747                                 asid);
6748         }
6749
6750         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6752                                 asid);
6753                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6754                                 asid);
6755                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6756                                 asid);
6757                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6758                                 asid);
6759                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6760                                 asid);
6761         }
6762
6763         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6765                                 asid);
6766                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6767                                 asid);
6768                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6769                                 asid);
6770                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6771                                 asid);
6772                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6773                                 asid);
6774         }
6775
6776         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6778                                 asid);
6779                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6780                                 asid);
6781                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6782                                 asid);
6783                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6784                                 asid);
6785                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6786                                 asid);
6787         }
6788
6789         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6790                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6791                                 asid);
6792                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6793                                 asid);
6794                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6795                                 asid);
6796                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6797                                 asid);
6798                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6799                                 asid);
6800         }
6801
6802         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6803         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6804
6805         hdev->asic_funcs->set_clock_gating(hdev);
6806
6807         mutex_unlock(&gaudi->clk_gate_mutex);
6808 }
6809
6810 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6811                 struct hl_cs_job *job)
6812 {
6813         struct packet_msg_prot *fence_pkt;
6814         u32 *fence_ptr;
6815         dma_addr_t fence_dma_addr;
6816         struct hl_cb *cb;
6817         u32 tmp, timeout, dma_offset;
6818         int rc;
6819
6820         if (hdev->pldm)
6821                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6822         else
6823                 timeout = HL_DEVICE_TIMEOUT_USEC;
6824
6825         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6826                 dev_err_ratelimited(hdev->dev,
6827                         "Can't send driver job on QMAN0 because the device is not idle\n");
6828                 return -EBUSY;
6829         }
6830
6831         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6832                                                         &fence_dma_addr);
6833         if (!fence_ptr) {
6834                 dev_err(hdev->dev,
6835                         "Failed to allocate fence memory for QMAN0\n");
6836                 return -ENOMEM;
6837         }
6838
6839         cb = job->patched_cb;
6840
6841         fence_pkt = cb->kernel_address +
6842                         job->job_cb_size - sizeof(struct packet_msg_prot);
6843
6844         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6845         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6846         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6847
6848         fence_pkt->ctl = cpu_to_le32(tmp);
6849         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6850         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6851
6852         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6853
6854         WREG32(mmDMA0_CORE_PROT + dma_offset,
6855                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6856
6857         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6858                                         job->job_cb_size, cb->bus_address);
6859         if (rc) {
6860                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6861                 goto free_fence_ptr;
6862         }
6863
6864         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6865                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6866                                 timeout, true);
6867
6868         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6869
6870         if (rc == -ETIMEDOUT) {
6871                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6872                 goto free_fence_ptr;
6873         }
6874
6875 free_fence_ptr:
6876         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6877
6878         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6879                                         fence_dma_addr);
6880         return rc;
6881 }
6882
6883 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6884 {
6885         if (event_type >= GAUDI_EVENT_SIZE)
6886                 goto event_not_supported;
6887
6888         if (!gaudi_irq_map_table[event_type].valid)
6889                 goto event_not_supported;
6890
6891         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6892
6893         return;
6894
6895 event_not_supported:
6896         snprintf(desc, size, "N/A");
6897 }
6898
6899 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6900                                                         u32 x_y, bool is_write)
6901 {
6902         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6903
6904         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6905                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6906
6907         switch (x_y) {
6908         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6909         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6910                 dma_id[0] = 0;
6911                 dma_id[1] = 2;
6912                 break;
6913         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6914         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6915                 dma_id[0] = 1;
6916                 dma_id[1] = 3;
6917                 break;
6918         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6919         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6920                 dma_id[0] = 4;
6921                 dma_id[1] = 6;
6922                 break;
6923         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6924         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6925                 dma_id[0] = 5;
6926                 dma_id[1] = 7;
6927                 break;
6928         default:
6929                 goto unknown_initiator;
6930         }
6931
6932         for (i = 0 ; i < 2 ; i++) {
6933                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6934                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6935         }
6936
6937         switch (x_y) {
6938         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6939         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6940                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6941                         return "DMA0";
6942                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6943                         return "DMA2";
6944                 else
6945                         return "DMA0 or DMA2";
6946         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6947         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6948                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6949                         return "DMA1";
6950                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6951                         return "DMA3";
6952                 else
6953                         return "DMA1 or DMA3";
6954         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6955         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6956                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6957                         return "DMA4";
6958                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6959                         return "DMA6";
6960                 else
6961                         return "DMA4 or DMA6";
6962         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6963         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6964                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6965                         return "DMA5";
6966                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6967                         return "DMA7";
6968                 else
6969                         return "DMA5 or DMA7";
6970         }
6971
6972 unknown_initiator:
6973         return "unknown initiator";
6974 }
6975
6976 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6977                                                         bool is_write)
6978 {
6979         u32 val, x_y, axi_id;
6980
6981         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6982                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6983         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6984                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6985         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6986                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6987
6988         switch (x_y) {
6989         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6990                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6991                         return "TPC0";
6992                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6993                         return "NIC0";
6994                 break;
6995         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6996                 return "TPC1";
6997         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6998         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6999                 return "MME0";
7000         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7001         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7002                 return "MME1";
7003         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7004                 return "TPC2";
7005         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7006                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7007                         return "TPC3";
7008                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7009                         return "PCI";
7010                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7011                         return "CPU";
7012                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7013                         return "PSOC";
7014                 break;
7015         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7016         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7017         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7018         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7019         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7020         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7021         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7022         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7023                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7024         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7025                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7026                         return "TPC4";
7027                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7028                         return "NIC1";
7029                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7030                         return "NIC2";
7031                 break;
7032         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7033                 return "TPC5";
7034         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7035         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7036                 return "MME2";
7037         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7038         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7039                 return "MME3";
7040         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7041                 return "TPC6";
7042         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7043                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7044                         return "TPC7";
7045                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7046                         return "NIC4";
7047                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7048                         return "NIC5";
7049                 break;
7050         default:
7051                 break;
7052         }
7053
7054         dev_err(hdev->dev,
7055                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7056                 val,
7057                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7058                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7059                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7060                         RAZWI_INITIATOR_AXI_ID_MASK);
7061
7062         return "unknown initiator";
7063 }
7064
7065 static void gaudi_print_razwi_info(struct hl_device *hdev)
7066 {
7067         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7068                 dev_err_ratelimited(hdev->dev,
7069                         "RAZWI event caused by illegal write of %s\n",
7070                         gaudi_get_razwi_initiator_name(hdev, true));
7071                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7072         }
7073
7074         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7075                 dev_err_ratelimited(hdev->dev,
7076                         "RAZWI event caused by illegal read of %s\n",
7077                         gaudi_get_razwi_initiator_name(hdev, false));
7078                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7079         }
7080 }
7081
7082 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7083 {
7084         struct gaudi_device *gaudi = hdev->asic_specific;
7085         u64 addr;
7086         u32 val;
7087
7088         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7089                 return;
7090
7091         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7092         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7093                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7094                 addr <<= 32;
7095                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7096
7097                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7098                                         addr);
7099
7100                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7101         }
7102
7103         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7104         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7105                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7106                 addr <<= 32;
7107                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7108
7109                 dev_err_ratelimited(hdev->dev,
7110                                 "MMU access error on va 0x%llx\n", addr);
7111
7112                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7113         }
7114 }
7115
7116 /*
7117  *  +-------------------+------------------------------------------------------+
7118  *  | Configuration Reg |                     Description                      |
7119  *  |      Address      |                                                      |
7120  *  +-------------------+------------------------------------------------------+
7121  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7122  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7123  *  |                   |0xF34 memory wrappers 63:32                           |
7124  *  |                   |0xF38 memory wrappers 95:64                           |
7125  *  |                   |0xF3C memory wrappers 127:96                          |
7126  *  +-------------------+------------------------------------------------------+
7127  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7128  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7129  *  |                   |0xF44 memory wrappers 63:32                           |
7130  *  |                   |0xF48 memory wrappers 95:64                           |
7131  *  |                   |0xF4C memory wrappers 127:96                          |
7132  *  +-------------------+------------------------------------------------------+
7133  */
7134 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7135                 struct ecc_info_extract_params *params, u64 *ecc_address,
7136                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7137 {
7138         struct gaudi_device *gaudi = hdev->asic_specific;
7139         u32 i, num_mem_regs, reg, err_bit;
7140         u64 err_addr, err_word = 0;
7141         int rc = 0;
7142
7143         num_mem_regs = params->num_memories / 32 +
7144                         ((params->num_memories % 32) ? 1 : 0);
7145
7146         if (params->block_address >= CFG_BASE)
7147                 params->block_address -= CFG_BASE;
7148
7149         if (params->derr)
7150                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7151         else
7152                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7153
7154         if (params->disable_clock_gating) {
7155                 mutex_lock(&gaudi->clk_gate_mutex);
7156                 hdev->asic_funcs->disable_clock_gating(hdev);
7157         }
7158
7159         /* Set invalid wrapper index */
7160         *memory_wrapper_idx = 0xFF;
7161
7162         /* Iterate through memory wrappers, a single bit must be set */
7163         for (i = 0 ; i < num_mem_regs ; i++) {
7164                 err_addr += i * 4;
7165                 err_word = RREG32(err_addr);
7166                 if (err_word) {
7167                         err_bit = __ffs(err_word);
7168                         *memory_wrapper_idx = err_bit + (32 * i);
7169                         break;
7170                 }
7171         }
7172
7173         if (*memory_wrapper_idx == 0xFF) {
7174                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7175                 rc = -EINVAL;
7176                 goto enable_clk_gate;
7177         }
7178
7179         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7180                         *memory_wrapper_idx);
7181
7182         *ecc_address =
7183                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7184         *ecc_syndrom =
7185                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7186
7187         /* Clear error indication */
7188         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7189         if (params->derr)
7190                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7191         else
7192                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7193
7194         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7195
7196 enable_clk_gate:
7197         if (params->disable_clock_gating) {
7198                 hdev->asic_funcs->set_clock_gating(hdev);
7199
7200                 mutex_unlock(&gaudi->clk_gate_mutex);
7201         }
7202
7203         return rc;
7204 }
7205
7206 /*
7207  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7208  *
7209  * @idx: the current pi/ci value
7210  * @q_len: the queue length (power of 2)
7211  *
7212  * @return the cyclically decremented index
7213  */
7214 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7215 {
7216         u32 mask = q_len - 1;
7217
7218         /*
7219          * modular decrement is equivalent to adding (queue_size -1)
7220          * later we take LSBs to make sure the value is in the
7221          * range [0, queue_len - 1]
7222          */
7223         return (idx + q_len - 1) & mask;
7224 }
7225
7226 /**
7227  * gaudi_print_sw_config_stream_data - print SW config stream data
7228  *
7229  * @hdev: pointer to the habanalabs device structure
7230  * @stream: the QMAN's stream
7231  * @qman_base: base address of QMAN registers block
7232  */
7233 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7234                                                 u64 qman_base)
7235 {
7236         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7237         u32 cq_ptr_lo_off, size;
7238
7239         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7240
7241         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7242                                                 stream * cq_ptr_lo_off;
7243         cq_ptr_hi = cq_ptr_lo +
7244                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7245         cq_tsize = cq_ptr_lo +
7246                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7247
7248         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7249         size = RREG32(cq_tsize);
7250         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7251                                                         stream, cq_ptr, size);
7252 }
7253
7254 /**
7255  * gaudi_print_last_pqes_on_err - print last PQEs on error
7256  *
7257  * @hdev: pointer to the habanalabs device structure
7258  * @qid_base: first QID of the QMAN (out of 4 streams)
7259  * @stream: the QMAN's stream
7260  * @qman_base: base address of QMAN registers block
7261  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7262  */
7263 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7264                                                 u32 stream, u64 qman_base,
7265                                                 bool pr_sw_conf)
7266 {
7267         u32 ci, qm_ci_stream_off, queue_len;
7268         struct hl_hw_queue *q;
7269         u64 pq_ci;
7270         int i;
7271
7272         q = &hdev->kernel_queues[qid_base + stream];
7273
7274         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7275         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7276                                                 stream * qm_ci_stream_off;
7277
7278         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7279                                         q->int_queue_len : HL_QUEUE_LENGTH;
7280
7281         hdev->asic_funcs->hw_queues_lock(hdev);
7282
7283         if (pr_sw_conf)
7284                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7285
7286         ci = RREG32(pq_ci);
7287
7288         /* we should start printing form ci -1 */
7289         ci = gaudi_queue_idx_dec(ci, queue_len);
7290
7291         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7292                 struct hl_bd *bd;
7293                 u64 addr;
7294                 u32 len;
7295
7296                 bd = q->kernel_address;
7297                 bd += ci;
7298
7299                 len = le32_to_cpu(bd->len);
7300                 /* len 0 means uninitialized entry- break */
7301                 if (!len)
7302                         break;
7303
7304                 addr = le64_to_cpu(bd->ptr);
7305
7306                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7307                                                         stream, ci, addr, len);
7308
7309                 /* get previous ci, wrap if needed */
7310                 ci = gaudi_queue_idx_dec(ci, queue_len);
7311         }
7312
7313         hdev->asic_funcs->hw_queues_unlock(hdev);
7314 }
7315
7316 /**
7317  * print_qman_data_on_err - extract QMAN data on error
7318  *
7319  * @hdev: pointer to the habanalabs device structure
7320  * @qid_base: first QID of the QMAN (out of 4 streams)
7321  * @stream: the QMAN's stream
7322  * @qman_base: base address of QMAN registers block
7323  *
7324  * This function attempt to exatract as much data as possible on QMAN error.
7325  * On upper CP print the SW config stream data and last 8 PQEs.
7326  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7327  */
7328 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7329                                                 u32 stream, u64 qman_base)
7330 {
7331         u32 i;
7332
7333         if (stream != QMAN_STREAMS) {
7334                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7335                                                                         true);
7336                 return;
7337         }
7338
7339         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7340
7341         for (i = 0; i < QMAN_STREAMS; i++)
7342                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7343                                                                         false);
7344 }
7345
7346 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7347                                           const char *qm_name,
7348                                           u64 qman_base,
7349                                           u32 qid_base)
7350 {
7351         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7352         u64 glbl_sts_addr, arb_err_addr;
7353         char reg_desc[32];
7354
7355         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7356         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7357
7358         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7359         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7360                 glbl_sts_clr_val = 0;
7361                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7362
7363                 if (!glbl_sts_val)
7364                         continue;
7365
7366                 if (i == QMAN_STREAMS)
7367                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7368                 else
7369                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7370
7371                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7372                         if (glbl_sts_val & BIT(j)) {
7373                                 dev_err_ratelimited(hdev->dev,
7374                                                 "%s %s. err cause: %s\n",
7375                                                 qm_name, reg_desc,
7376                                                 gaudi_qman_error_cause[j]);
7377                                 glbl_sts_clr_val |= BIT(j);
7378                         }
7379                 }
7380
7381                 /* Write 1 clear errors */
7382                 if (!hdev->stop_on_err)
7383                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7384                 else
7385                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7386         }
7387
7388         arb_err_val = RREG32(arb_err_addr);
7389
7390         if (!arb_err_val)
7391                 return;
7392
7393         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7394                 if (arb_err_val & BIT(j)) {
7395                         dev_err_ratelimited(hdev->dev,
7396                                         "%s ARB_ERR. err cause: %s\n",
7397                                         qm_name,
7398                                         gaudi_qman_arb_error_cause[j]);
7399                 }
7400         }
7401 }
7402
7403 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7404                 struct hl_eq_sm_sei_data *sei_data)
7405 {
7406         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7407
7408         /* Flip the bits as the enum is ordered in the opposite way */
7409         index = (index ^ 0x3) & 0x3;
7410
7411         switch (sei_data->sei_cause) {
7412         case SM_SEI_SO_OVERFLOW:
7413                 dev_err_ratelimited(hdev->dev,
7414                         "%s SEI Error: SOB Group %u overflow/underflow",
7415                         gaudi_sync_manager_names[index],
7416                         le32_to_cpu(sei_data->sei_log));
7417                 break;
7418         case SM_SEI_LBW_4B_UNALIGNED:
7419                 dev_err_ratelimited(hdev->dev,
7420                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7421                         gaudi_sync_manager_names[index],
7422                         le32_to_cpu(sei_data->sei_log));
7423                 break;
7424         case SM_SEI_AXI_RESPONSE_ERR:
7425                 dev_err_ratelimited(hdev->dev,
7426                         "%s SEI Error: AXI ID %u response error",
7427                         gaudi_sync_manager_names[index],
7428                         le32_to_cpu(sei_data->sei_log));
7429                 break;
7430         default:
7431                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7432                                 le32_to_cpu(sei_data->sei_log));
7433                 break;
7434         }
7435 }
7436
7437 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7438                 struct hl_eq_ecc_data *ecc_data)
7439 {
7440         struct ecc_info_extract_params params;
7441         u64 ecc_address = 0, ecc_syndrom = 0;
7442         u8 index, memory_wrapper_idx = 0;
7443         bool extract_info_from_fw;
7444         int rc;
7445
7446         switch (event_type) {
7447         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7448         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7449                 extract_info_from_fw = true;
7450                 break;
7451         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7452                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7453                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7454                 params.num_memories = 90;
7455                 params.derr = false;
7456                 params.disable_clock_gating = true;
7457                 extract_info_from_fw = false;
7458                 break;
7459         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7460                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7461                 params.block_address =
7462                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7463                 params.num_memories = 90;
7464                 params.derr = true;
7465                 params.disable_clock_gating = true;
7466                 extract_info_from_fw = false;
7467                 break;
7468         case GAUDI_EVENT_MME0_ACC_SERR:
7469         case GAUDI_EVENT_MME1_ACC_SERR:
7470         case GAUDI_EVENT_MME2_ACC_SERR:
7471         case GAUDI_EVENT_MME3_ACC_SERR:
7472                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7473                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7474                 params.num_memories = 128;
7475                 params.derr = false;
7476                 params.disable_clock_gating = true;
7477                 extract_info_from_fw = false;
7478                 break;
7479         case GAUDI_EVENT_MME0_ACC_DERR:
7480         case GAUDI_EVENT_MME1_ACC_DERR:
7481         case GAUDI_EVENT_MME2_ACC_DERR:
7482         case GAUDI_EVENT_MME3_ACC_DERR:
7483                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7484                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7485                 params.num_memories = 128;
7486                 params.derr = true;
7487                 params.disable_clock_gating = true;
7488                 extract_info_from_fw = false;
7489                 break;
7490         case GAUDI_EVENT_MME0_SBAB_SERR:
7491         case GAUDI_EVENT_MME1_SBAB_SERR:
7492         case GAUDI_EVENT_MME2_SBAB_SERR:
7493         case GAUDI_EVENT_MME3_SBAB_SERR:
7494                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7495                 params.block_address =
7496                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7497                 params.num_memories = 33;
7498                 params.derr = false;
7499                 params.disable_clock_gating = true;
7500                 extract_info_from_fw = false;
7501                 break;
7502         case GAUDI_EVENT_MME0_SBAB_DERR:
7503         case GAUDI_EVENT_MME1_SBAB_DERR:
7504         case GAUDI_EVENT_MME2_SBAB_DERR:
7505         case GAUDI_EVENT_MME3_SBAB_DERR:
7506                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7507                 params.block_address =
7508                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7509                 params.num_memories = 33;
7510                 params.derr = true;
7511                 params.disable_clock_gating = true;
7512                 extract_info_from_fw = false;
7513                 break;
7514         default:
7515                 return;
7516         }
7517
7518         if (extract_info_from_fw) {
7519                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7520                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7521                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7522         } else {
7523                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7524                                 &ecc_syndrom, &memory_wrapper_idx);
7525                 if (rc)
7526                         return;
7527         }
7528
7529         dev_err(hdev->dev,
7530                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7531                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7532 }
7533
7534 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7535 {
7536         u64 qman_base;
7537         char desc[32];
7538         u32 qid_base;
7539         u8 index;
7540
7541         switch (event_type) {
7542         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7543                 index = event_type - GAUDI_EVENT_TPC0_QM;
7544                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7545                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7546                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7547                 break;
7548         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7549                 index = event_type - GAUDI_EVENT_MME0_QM;
7550                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7551                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7552                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7553                 break;
7554         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7555                 index = event_type - GAUDI_EVENT_DMA0_QM;
7556                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7557                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7558                 if (index > 1)
7559                         qid_base++;
7560                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7561                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7562                 break;
7563         case GAUDI_EVENT_NIC0_QM0:
7564                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7565                 qman_base = mmNIC0_QM0_BASE;
7566                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7567                 break;
7568         case GAUDI_EVENT_NIC0_QM1:
7569                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7570                 qman_base = mmNIC0_QM1_BASE;
7571                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7572                 break;
7573         case GAUDI_EVENT_NIC1_QM0:
7574                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7575                 qman_base = mmNIC1_QM0_BASE;
7576                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7577                 break;
7578         case GAUDI_EVENT_NIC1_QM1:
7579                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7580                 qman_base = mmNIC1_QM1_BASE;
7581                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7582                 break;
7583         case GAUDI_EVENT_NIC2_QM0:
7584                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7585                 qman_base = mmNIC2_QM0_BASE;
7586                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7587                 break;
7588         case GAUDI_EVENT_NIC2_QM1:
7589                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7590                 qman_base = mmNIC2_QM1_BASE;
7591                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7592                 break;
7593         case GAUDI_EVENT_NIC3_QM0:
7594                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7595                 qman_base = mmNIC3_QM0_BASE;
7596                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7597                 break;
7598         case GAUDI_EVENT_NIC3_QM1:
7599                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7600                 qman_base = mmNIC3_QM1_BASE;
7601                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7602                 break;
7603         case GAUDI_EVENT_NIC4_QM0:
7604                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7605                 qman_base = mmNIC4_QM0_BASE;
7606                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7607                 break;
7608         case GAUDI_EVENT_NIC4_QM1:
7609                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7610                 qman_base = mmNIC4_QM1_BASE;
7611                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7612                 break;
7613         default:
7614                 return;
7615         }
7616
7617         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7618 }
7619
7620 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7621                                         bool razwi)
7622 {
7623         char desc[64] = "";
7624
7625         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7626         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7627                 event_type, desc);
7628
7629         if (razwi) {
7630                 gaudi_print_razwi_info(hdev);
7631                 gaudi_print_mmu_error_info(hdev);
7632         }
7633 }
7634
7635 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7636                                         struct cpucp_pkt_sync_err *sync_err)
7637 {
7638         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7639
7640         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7641                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7642 }
7643
7644 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7645                                         struct hl_eq_fw_alive *fw_alive)
7646 {
7647         dev_err(hdev->dev,
7648                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7649                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7650                 "Minor" : "Critical", fw_alive->process_id,
7651                 fw_alive->thread_id, fw_alive->uptime_seconds);
7652 }
7653
7654 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7655 {
7656         struct gaudi_device *gaudi = hdev->asic_specific;
7657
7658         /* Unmask all IRQs since some could have been received
7659          * during the soft reset
7660          */
7661         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7662 }
7663
7664 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7665                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7666 {
7667         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7668         int rc = 0;
7669
7670         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7671                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7672                 if (!hbm_ecc_data) {
7673                         dev_err(hdev->dev, "No FW ECC data");
7674                         return 0;
7675                 }
7676
7677                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7678                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7679                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7680                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7681                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7682                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7683                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7684                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7685                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7686                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7687                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7688                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7689                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7690                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7691
7692                 dev_err(hdev->dev,
7693                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7694                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7695                 dev_err(hdev->dev,
7696                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7697                         device, ch, hbm_ecc_data->first_addr, type,
7698                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7699                         hbm_ecc_data->dec_cnt);
7700                 return 0;
7701         }
7702
7703         if (hdev->asic_prop.fw_security_enabled) {
7704                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7705                 return 0;
7706         }
7707
7708         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7709         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7710                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7711                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7712                 if (val) {
7713                         rc = -EIO;
7714                         dev_err(hdev->dev,
7715                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7716                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7717                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7718                                 (val >> 4) & 0x1);
7719
7720                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7721                         dev_err(hdev->dev,
7722                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7723                                 device, ch * 2,
7724                                 RREG32(base + ch * 0x1000 + 0x064),
7725                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7726                                 (val2 & 0xFF0000) >> 16,
7727                                 (val2 & 0xFF000000) >> 24);
7728                 }
7729
7730                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7731                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7732                 if (val) {
7733                         rc = -EIO;
7734                         dev_err(hdev->dev,
7735                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7736                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7737                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7738                                 (val >> 4) & 0x1);
7739
7740                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7741                         dev_err(hdev->dev,
7742                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7743                                 device, ch * 2 + 1,
7744                                 RREG32(base + ch * 0x1000 + 0x074),
7745                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7746                                 (val2 & 0xFF0000) >> 16,
7747                                 (val2 & 0xFF000000) >> 24);
7748                 }
7749
7750                 /* Clear interrupts */
7751                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7752                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7753                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7754                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7755                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7756                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7757         }
7758
7759         val  = RREG32(base + 0x8F30);
7760         val2 = RREG32(base + 0x8F34);
7761         if (val | val2) {
7762                 rc = -EIO;
7763                 dev_err(hdev->dev,
7764                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7765                         device, val, val2);
7766         }
7767         val  = RREG32(base + 0x8F40);
7768         val2 = RREG32(base + 0x8F44);
7769         if (val | val2) {
7770                 rc = -EIO;
7771                 dev_err(hdev->dev,
7772                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7773                         device, val, val2);
7774         }
7775
7776         return rc;
7777 }
7778
7779 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7780 {
7781         switch (hbm_event_type) {
7782         case GAUDI_EVENT_HBM0_SPI_0:
7783         case GAUDI_EVENT_HBM0_SPI_1:
7784                 return 0;
7785         case GAUDI_EVENT_HBM1_SPI_0:
7786         case GAUDI_EVENT_HBM1_SPI_1:
7787                 return 1;
7788         case GAUDI_EVENT_HBM2_SPI_0:
7789         case GAUDI_EVENT_HBM2_SPI_1:
7790                 return 2;
7791         case GAUDI_EVENT_HBM3_SPI_0:
7792         case GAUDI_EVENT_HBM3_SPI_1:
7793                 return 3;
7794         default:
7795                 break;
7796         }
7797
7798         /* Should never happen */
7799         return 0;
7800 }
7801
7802 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7803                                         char *interrupt_name)
7804 {
7805         struct gaudi_device *gaudi = hdev->asic_specific;
7806         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7807         bool soft_reset_required = false;
7808
7809         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7810          * gating, and thus cannot be done in CPU-CP and should be done instead
7811          * by the driver.
7812          */
7813
7814         mutex_lock(&gaudi->clk_gate_mutex);
7815
7816         hdev->asic_funcs->disable_clock_gating(hdev);
7817
7818         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7819                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7820
7821         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7822                 if (tpc_interrupts_cause & BIT(i)) {
7823                         dev_err_ratelimited(hdev->dev,
7824                                         "TPC%d_%s interrupt cause: %s\n",
7825                                         tpc_id, interrupt_name,
7826                                         gaudi_tpc_interrupts_cause[i]);
7827                         /* If this is QM error, we need to soft-reset */
7828                         if (i == 15)
7829                                 soft_reset_required = true;
7830                 }
7831
7832         /* Clear interrupts */
7833         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7834
7835         hdev->asic_funcs->set_clock_gating(hdev);
7836
7837         mutex_unlock(&gaudi->clk_gate_mutex);
7838
7839         return soft_reset_required;
7840 }
7841
7842 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7843 {
7844         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7845 }
7846
7847 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7848 {
7849         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7850 }
7851
7852 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7853                                         u16 event_type)
7854 {
7855         switch (event_type) {
7856         case GAUDI_EVENT_FIX_POWER_ENV_S:
7857                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7858                 dev_info_ratelimited(hdev->dev,
7859                         "Clock throttling due to power consumption\n");
7860                 break;
7861
7862         case GAUDI_EVENT_FIX_POWER_ENV_E:
7863                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7864                 dev_info_ratelimited(hdev->dev,
7865                         "Power envelop is safe, back to optimal clock\n");
7866                 break;
7867
7868         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7869                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7870                 dev_info_ratelimited(hdev->dev,
7871                         "Clock throttling due to overheating\n");
7872                 break;
7873
7874         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7875                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7876                 dev_info_ratelimited(hdev->dev,
7877                         "Thermal envelop is safe, back to optimal clock\n");
7878                 break;
7879
7880         default:
7881                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7882                         event_type);
7883                 break;
7884         }
7885 }
7886
7887 static void gaudi_handle_eqe(struct hl_device *hdev,
7888                                 struct hl_eq_entry *eq_entry)
7889 {
7890         struct gaudi_device *gaudi = hdev->asic_specific;
7891         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7892         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7893                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7894         bool reset_required;
7895         u8 cause;
7896         int rc;
7897
7898         if (event_type >= GAUDI_EVENT_SIZE) {
7899                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7900                                 event_type, GAUDI_EVENT_SIZE - 1);
7901                 return;
7902         }
7903
7904         gaudi->events_stat[event_type]++;
7905         gaudi->events_stat_aggregate[event_type]++;
7906
7907         switch (event_type) {
7908         case GAUDI_EVENT_PCIE_CORE_DERR:
7909         case GAUDI_EVENT_PCIE_IF_DERR:
7910         case GAUDI_EVENT_PCIE_PHY_DERR:
7911         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7912         case GAUDI_EVENT_MME0_ACC_DERR:
7913         case GAUDI_EVENT_MME0_SBAB_DERR:
7914         case GAUDI_EVENT_MME1_ACC_DERR:
7915         case GAUDI_EVENT_MME1_SBAB_DERR:
7916         case GAUDI_EVENT_MME2_ACC_DERR:
7917         case GAUDI_EVENT_MME2_SBAB_DERR:
7918         case GAUDI_EVENT_MME3_ACC_DERR:
7919         case GAUDI_EVENT_MME3_SBAB_DERR:
7920         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7921                 fallthrough;
7922         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7923         case GAUDI_EVENT_PSOC_MEM_DERR:
7924         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7925         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7926         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7927         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7928         case GAUDI_EVENT_MMU_DERR:
7929         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7930                 gaudi_print_irq_info(hdev, event_type, true);
7931                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7932                 goto reset_device;
7933
7934         case GAUDI_EVENT_GIC500:
7935         case GAUDI_EVENT_AXI_ECC:
7936         case GAUDI_EVENT_L2_RAM_ECC:
7937         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7938                 gaudi_print_irq_info(hdev, event_type, false);
7939                 goto reset_device;
7940
7941         case GAUDI_EVENT_HBM0_SPI_0:
7942         case GAUDI_EVENT_HBM1_SPI_0:
7943         case GAUDI_EVENT_HBM2_SPI_0:
7944         case GAUDI_EVENT_HBM3_SPI_0:
7945                 gaudi_print_irq_info(hdev, event_type, false);
7946                 gaudi_hbm_read_interrupts(hdev,
7947                                 gaudi_hbm_event_to_dev(event_type),
7948                                 &eq_entry->hbm_ecc_data);
7949                 goto reset_device;
7950
7951         case GAUDI_EVENT_HBM0_SPI_1:
7952         case GAUDI_EVENT_HBM1_SPI_1:
7953         case GAUDI_EVENT_HBM2_SPI_1:
7954         case GAUDI_EVENT_HBM3_SPI_1:
7955                 gaudi_print_irq_info(hdev, event_type, false);
7956                 gaudi_hbm_read_interrupts(hdev,
7957                                 gaudi_hbm_event_to_dev(event_type),
7958                                 &eq_entry->hbm_ecc_data);
7959                 hl_fw_unmask_irq(hdev, event_type);
7960                 break;
7961
7962         case GAUDI_EVENT_TPC0_DEC:
7963         case GAUDI_EVENT_TPC1_DEC:
7964         case GAUDI_EVENT_TPC2_DEC:
7965         case GAUDI_EVENT_TPC3_DEC:
7966         case GAUDI_EVENT_TPC4_DEC:
7967         case GAUDI_EVENT_TPC5_DEC:
7968         case GAUDI_EVENT_TPC6_DEC:
7969         case GAUDI_EVENT_TPC7_DEC:
7970                 gaudi_print_irq_info(hdev, event_type, true);
7971                 reset_required = gaudi_tpc_read_interrupts(hdev,
7972                                         tpc_dec_event_to_tpc_id(event_type),
7973                                         "AXI_SLV_DEC_Error");
7974                 if (reset_required) {
7975                         dev_err(hdev->dev, "hard reset required due to %s\n",
7976                                 gaudi_irq_map_table[event_type].name);
7977
7978                         goto reset_device;
7979                 } else {
7980                         hl_fw_unmask_irq(hdev, event_type);
7981                 }
7982                 break;
7983
7984         case GAUDI_EVENT_TPC0_KRN_ERR:
7985         case GAUDI_EVENT_TPC1_KRN_ERR:
7986         case GAUDI_EVENT_TPC2_KRN_ERR:
7987         case GAUDI_EVENT_TPC3_KRN_ERR:
7988         case GAUDI_EVENT_TPC4_KRN_ERR:
7989         case GAUDI_EVENT_TPC5_KRN_ERR:
7990         case GAUDI_EVENT_TPC6_KRN_ERR:
7991         case GAUDI_EVENT_TPC7_KRN_ERR:
7992                 gaudi_print_irq_info(hdev, event_type, true);
7993                 reset_required = gaudi_tpc_read_interrupts(hdev,
7994                                         tpc_krn_event_to_tpc_id(event_type),
7995                                         "KRN_ERR");
7996                 if (reset_required) {
7997                         dev_err(hdev->dev, "hard reset required due to %s\n",
7998                                 gaudi_irq_map_table[event_type].name);
7999
8000                         goto reset_device;
8001                 } else {
8002                         hl_fw_unmask_irq(hdev, event_type);
8003                 }
8004                 break;
8005
8006         case GAUDI_EVENT_PCIE_CORE_SERR:
8007         case GAUDI_EVENT_PCIE_IF_SERR:
8008         case GAUDI_EVENT_PCIE_PHY_SERR:
8009         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8010         case GAUDI_EVENT_MME0_ACC_SERR:
8011         case GAUDI_EVENT_MME0_SBAB_SERR:
8012         case GAUDI_EVENT_MME1_ACC_SERR:
8013         case GAUDI_EVENT_MME1_SBAB_SERR:
8014         case GAUDI_EVENT_MME2_ACC_SERR:
8015         case GAUDI_EVENT_MME2_SBAB_SERR:
8016         case GAUDI_EVENT_MME3_ACC_SERR:
8017         case GAUDI_EVENT_MME3_SBAB_SERR:
8018         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8019         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8020         case GAUDI_EVENT_PSOC_MEM_SERR:
8021         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8022         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8023         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8024         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8025                 fallthrough;
8026         case GAUDI_EVENT_MMU_SERR:
8027                 gaudi_print_irq_info(hdev, event_type, true);
8028                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8029                 hl_fw_unmask_irq(hdev, event_type);
8030                 break;
8031
8032         case GAUDI_EVENT_PCIE_DEC:
8033         case GAUDI_EVENT_MME0_WBC_RSP:
8034         case GAUDI_EVENT_MME0_SBAB0_RSP:
8035         case GAUDI_EVENT_MME1_WBC_RSP:
8036         case GAUDI_EVENT_MME1_SBAB0_RSP:
8037         case GAUDI_EVENT_MME2_WBC_RSP:
8038         case GAUDI_EVENT_MME2_SBAB0_RSP:
8039         case GAUDI_EVENT_MME3_WBC_RSP:
8040         case GAUDI_EVENT_MME3_SBAB0_RSP:
8041         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8042         case GAUDI_EVENT_PSOC_AXI_DEC:
8043         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8044         case GAUDI_EVENT_MMU_PAGE_FAULT:
8045         case GAUDI_EVENT_MMU_WR_PERM:
8046         case GAUDI_EVENT_RAZWI_OR_ADC:
8047         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8048         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8049         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8050                 fallthrough;
8051         case GAUDI_EVENT_NIC0_QM0:
8052         case GAUDI_EVENT_NIC0_QM1:
8053         case GAUDI_EVENT_NIC1_QM0:
8054         case GAUDI_EVENT_NIC1_QM1:
8055         case GAUDI_EVENT_NIC2_QM0:
8056         case GAUDI_EVENT_NIC2_QM1:
8057         case GAUDI_EVENT_NIC3_QM0:
8058         case GAUDI_EVENT_NIC3_QM1:
8059         case GAUDI_EVENT_NIC4_QM0:
8060         case GAUDI_EVENT_NIC4_QM1:
8061         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8062                 gaudi_print_irq_info(hdev, event_type, true);
8063                 gaudi_handle_qman_err(hdev, event_type);
8064                 hl_fw_unmask_irq(hdev, event_type);
8065                 break;
8066
8067         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8068                 gaudi_print_irq_info(hdev, event_type, true);
8069                 goto reset_device;
8070
8071         case GAUDI_EVENT_TPC0_BMON_SPMU:
8072         case GAUDI_EVENT_TPC1_BMON_SPMU:
8073         case GAUDI_EVENT_TPC2_BMON_SPMU:
8074         case GAUDI_EVENT_TPC3_BMON_SPMU:
8075         case GAUDI_EVENT_TPC4_BMON_SPMU:
8076         case GAUDI_EVENT_TPC5_BMON_SPMU:
8077         case GAUDI_EVENT_TPC6_BMON_SPMU:
8078         case GAUDI_EVENT_TPC7_BMON_SPMU:
8079         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8080                 gaudi_print_irq_info(hdev, event_type, false);
8081                 hl_fw_unmask_irq(hdev, event_type);
8082                 break;
8083
8084         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8085                 gaudi_print_irq_info(hdev, event_type, false);
8086                 gaudi_print_sm_sei_info(hdev, event_type,
8087                                         &eq_entry->sm_sei_data);
8088                 rc = hl_state_dump(hdev);
8089                 if (rc)
8090                         dev_err(hdev->dev,
8091                                 "Error during system state dump %d\n", rc);
8092                 hl_fw_unmask_irq(hdev, event_type);
8093                 break;
8094
8095         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8096                 gaudi_print_clk_change_info(hdev, event_type);
8097                 hl_fw_unmask_irq(hdev, event_type);
8098                 break;
8099
8100         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8101                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8102                 dev_err(hdev->dev,
8103                         "Received high temp H/W interrupt %d (cause %d)\n",
8104                         event_type, cause);
8105                 break;
8106
8107         case GAUDI_EVENT_DEV_RESET_REQ:
8108                 gaudi_print_irq_info(hdev, event_type, false);
8109                 goto reset_device;
8110
8111         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8112                 gaudi_print_irq_info(hdev, event_type, false);
8113                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8114                 goto reset_device;
8115
8116         case GAUDI_EVENT_FW_ALIVE_S:
8117                 gaudi_print_irq_info(hdev, event_type, false);
8118                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8119                 goto reset_device;
8120
8121         default:
8122                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8123                                 event_type);
8124                 break;
8125         }
8126
8127         return;
8128
8129 reset_device:
8130         if (hdev->hard_reset_on_fw_events)
8131                 hl_device_reset(hdev, HL_RESET_HARD);
8132         else
8133                 hl_fw_unmask_irq(hdev, event_type);
8134 }
8135
8136 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8137                                         u32 *size)
8138 {
8139         struct gaudi_device *gaudi = hdev->asic_specific;
8140
8141         if (aggregate) {
8142                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8143                 return gaudi->events_stat_aggregate;
8144         }
8145
8146         *size = (u32) sizeof(gaudi->events_stat);
8147         return gaudi->events_stat;
8148 }
8149
8150 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8151                                         u32 flags)
8152 {
8153         struct gaudi_device *gaudi = hdev->asic_specific;
8154         u32 status, timeout_usec;
8155         int rc;
8156
8157         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8158                 hdev->hard_reset_pending)
8159                 return 0;
8160
8161         if (hdev->pldm)
8162                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8163         else
8164                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8165
8166         /* L0 & L1 invalidation */
8167         WREG32(mmSTLB_INV_PS, 3);
8168         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8169         WREG32(mmSTLB_INV_PS, 2);
8170
8171         rc = hl_poll_timeout(
8172                 hdev,
8173                 mmSTLB_INV_PS,
8174                 status,
8175                 !status,
8176                 1000,
8177                 timeout_usec);
8178
8179         WREG32(mmSTLB_INV_SET, 0);
8180
8181         if (rc) {
8182                 dev_err_ratelimited(hdev->dev,
8183                                         "MMU cache invalidation timeout\n");
8184                 hl_device_reset(hdev, HL_RESET_HARD);
8185         }
8186
8187         return rc;
8188 }
8189
8190 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8191                                                 bool is_hard, u32 flags,
8192                                                 u32 asid, u64 va, u64 size)
8193 {
8194         /* Treat as invalidate all because there is no range invalidation
8195          * in Gaudi
8196          */
8197         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8198 }
8199
8200 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8201                                         u32 asid, u64 phys_addr)
8202 {
8203         u32 status, timeout_usec;
8204         int rc;
8205
8206         if (hdev->pldm)
8207                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8208         else
8209                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8210
8211         WREG32(MMU_ASID, asid);
8212         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8213         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8214         WREG32(MMU_BUSY, 0x80000000);
8215
8216         rc = hl_poll_timeout(
8217                 hdev,
8218                 MMU_BUSY,
8219                 status,
8220                 !(status & 0x80000000),
8221                 1000,
8222                 timeout_usec);
8223
8224         if (rc) {
8225                 dev_err(hdev->dev,
8226                         "Timeout during MMU hop0 config of asid %d\n", asid);
8227                 return rc;
8228         }
8229
8230         return 0;
8231 }
8232
8233 static int gaudi_send_heartbeat(struct hl_device *hdev)
8234 {
8235         struct gaudi_device *gaudi = hdev->asic_specific;
8236
8237         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8238                 return 0;
8239
8240         return hl_fw_send_heartbeat(hdev);
8241 }
8242
8243 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8244 {
8245         struct gaudi_device *gaudi = hdev->asic_specific;
8246         struct asic_fixed_properties *prop = &hdev->asic_prop;
8247         int rc;
8248
8249         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8250                 return 0;
8251
8252         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8253                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8254                                         mmCPU_BOOT_ERR1);
8255         if (rc)
8256                 return rc;
8257
8258         if (!strlen(prop->cpucp_info.card_name))
8259                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8260                                 CARD_NAME_MAX_LEN);
8261
8262         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8263
8264         set_default_power_values(hdev);
8265
8266         hdev->max_power = prop->max_power_default;
8267
8268         return 0;
8269 }
8270
8271 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8272                                         u8 mask_len, struct seq_file *s)
8273 {
8274         struct gaudi_device *gaudi = hdev->asic_specific;
8275         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8276         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8277         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8278         unsigned long *mask = (unsigned long *)mask_arr;
8279         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8280         bool is_idle = true, is_eng_idle, is_slave;
8281         u64 offset;
8282         int i, dma_id, port;
8283
8284         mutex_lock(&gaudi->clk_gate_mutex);
8285
8286         hdev->asic_funcs->disable_clock_gating(hdev);
8287
8288         if (s)
8289                 seq_puts(s,
8290                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8291                         "---  -------  ------------  ----------  -------------\n");
8292
8293         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8294                 dma_id = gaudi_dma_assignment[i];
8295                 offset = dma_id * DMA_QMAN_OFFSET;
8296
8297                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8298                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8299                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8300                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8301                                 IS_DMA_IDLE(dma_core_sts0);
8302                 is_idle &= is_eng_idle;
8303
8304                 if (mask && !is_eng_idle)
8305                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8306                 if (s)
8307                         seq_printf(s, fmt, dma_id,
8308                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8309                                 qm_cgm_sts, dma_core_sts0);
8310         }
8311
8312         if (s)
8313                 seq_puts(s,
8314                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8315                         "---  -------  ------------  ----------  ----------\n");
8316
8317         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8318                 offset = i * TPC_QMAN_OFFSET;
8319                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8320                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8321                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8322                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8323                                 IS_TPC_IDLE(tpc_cfg_sts);
8324                 is_idle &= is_eng_idle;
8325
8326                 if (mask && !is_eng_idle)
8327                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8328                 if (s)
8329                         seq_printf(s, fmt, i,
8330                                 is_eng_idle ? "Y" : "N",
8331                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8332         }
8333
8334         if (s)
8335                 seq_puts(s,
8336                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8337                         "---  -------  ------------  ----------  -----------\n");
8338
8339         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8340                 offset = i * MME_QMAN_OFFSET;
8341                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8342                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8343
8344                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8345                 is_slave = i % 2;
8346                 if (!is_slave) {
8347                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8348                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8349                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8350                 }
8351
8352                 is_idle &= is_eng_idle;
8353
8354                 if (mask && !is_eng_idle)
8355                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8356                 if (s) {
8357                         if (!is_slave)
8358                                 seq_printf(s, fmt, i,
8359                                         is_eng_idle ? "Y" : "N",
8360                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8361                         else
8362                                 seq_printf(s, mme_slave_fmt, i,
8363                                         is_eng_idle ? "Y" : "N", "-",
8364                                         "-", mme_arch_sts);
8365                 }
8366         }
8367
8368         if (s)
8369                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8370                                 "---  -------  ------------  ----------\n");
8371
8372         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8373                 offset = i * NIC_MACRO_QMAN_OFFSET;
8374                 port = 2 * i;
8375                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8376                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8377                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8378                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8379                         is_idle &= is_eng_idle;
8380
8381                         if (mask && !is_eng_idle)
8382                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8383                         if (s)
8384                                 seq_printf(s, nic_fmt, port,
8385                                                 is_eng_idle ? "Y" : "N",
8386                                                 qm_glbl_sts0, qm_cgm_sts);
8387                 }
8388
8389                 port = 2 * i + 1;
8390                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8391                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8392                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8393                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8394                         is_idle &= is_eng_idle;
8395
8396                         if (mask && !is_eng_idle)
8397                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8398                         if (s)
8399                                 seq_printf(s, nic_fmt, port,
8400                                                 is_eng_idle ? "Y" : "N",
8401                                                 qm_glbl_sts0, qm_cgm_sts);
8402                 }
8403         }
8404
8405         if (s)
8406                 seq_puts(s, "\n");
8407
8408         hdev->asic_funcs->set_clock_gating(hdev);
8409
8410         mutex_unlock(&gaudi->clk_gate_mutex);
8411
8412         return is_idle;
8413 }
8414
8415 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8416         __acquires(&gaudi->hw_queues_lock)
8417 {
8418         struct gaudi_device *gaudi = hdev->asic_specific;
8419
8420         spin_lock(&gaudi->hw_queues_lock);
8421 }
8422
8423 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8424         __releases(&gaudi->hw_queues_lock)
8425 {
8426         struct gaudi_device *gaudi = hdev->asic_specific;
8427
8428         spin_unlock(&gaudi->hw_queues_lock);
8429 }
8430
8431 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8432 {
8433         return hdev->pdev->device;
8434 }
8435
8436 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8437                                 size_t max_size)
8438 {
8439         struct gaudi_device *gaudi = hdev->asic_specific;
8440
8441         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8442                 return 0;
8443
8444         return hl_fw_get_eeprom_data(hdev, data, max_size);
8445 }
8446
8447 /*
8448  * this function should be used only during initialization and/or after reset,
8449  * when there are no active users.
8450  */
8451 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8452                                 u32 tpc_id)
8453 {
8454         struct gaudi_device *gaudi = hdev->asic_specific;
8455         u64 kernel_timeout;
8456         u32 status, offset;
8457         int rc;
8458
8459         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8460
8461         if (hdev->pldm)
8462                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8463         else
8464                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8465
8466         mutex_lock(&gaudi->clk_gate_mutex);
8467
8468         hdev->asic_funcs->disable_clock_gating(hdev);
8469
8470         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8471                         lower_32_bits(tpc_kernel));
8472         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8473                         upper_32_bits(tpc_kernel));
8474
8475         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8476                         lower_32_bits(tpc_kernel));
8477         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8478                         upper_32_bits(tpc_kernel));
8479         /* set a valid LUT pointer, content is of no significance */
8480         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8481                         lower_32_bits(tpc_kernel));
8482         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8483                         upper_32_bits(tpc_kernel));
8484
8485         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8486                         lower_32_bits(CFG_BASE +
8487                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8488
8489         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8490                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8491                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8492         /* wait a bit for the engine to start executing */
8493         usleep_range(1000, 1500);
8494
8495         /* wait until engine has finished executing */
8496         rc = hl_poll_timeout(
8497                 hdev,
8498                 mmTPC0_CFG_STATUS + offset,
8499                 status,
8500                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8501                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8502                 1000,
8503                 kernel_timeout);
8504
8505         if (rc) {
8506                 dev_err(hdev->dev,
8507                         "Timeout while waiting for TPC%d icache prefetch\n",
8508                         tpc_id);
8509                 hdev->asic_funcs->set_clock_gating(hdev);
8510                 mutex_unlock(&gaudi->clk_gate_mutex);
8511                 return -EIO;
8512         }
8513
8514         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8515                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8516
8517         /* wait a bit for the engine to start executing */
8518         usleep_range(1000, 1500);
8519
8520         /* wait until engine has finished executing */
8521         rc = hl_poll_timeout(
8522                 hdev,
8523                 mmTPC0_CFG_STATUS + offset,
8524                 status,
8525                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8526                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8527                 1000,
8528                 kernel_timeout);
8529
8530         if (rc) {
8531                 dev_err(hdev->dev,
8532                         "Timeout while waiting for TPC%d vector pipe\n",
8533                         tpc_id);
8534                 hdev->asic_funcs->set_clock_gating(hdev);
8535                 mutex_unlock(&gaudi->clk_gate_mutex);
8536                 return -EIO;
8537         }
8538
8539         rc = hl_poll_timeout(
8540                 hdev,
8541                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8542                 status,
8543                 (status == 0),
8544                 1000,
8545                 kernel_timeout);
8546
8547         hdev->asic_funcs->set_clock_gating(hdev);
8548         mutex_unlock(&gaudi->clk_gate_mutex);
8549
8550         if (rc) {
8551                 dev_err(hdev->dev,
8552                         "Timeout while waiting for TPC%d kernel to execute\n",
8553                         tpc_id);
8554                 return -EIO;
8555         }
8556
8557         return 0;
8558 }
8559
8560 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8561                 struct hl_ctx *ctx)
8562 {
8563         struct gaudi_device *gaudi = hdev->asic_specific;
8564         int min_alloc_order, rc, collective_cb_size;
8565
8566         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8567                 return 0;
8568
8569         hdev->internal_cb_pool_virt_addr =
8570                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8571                                         HOST_SPACE_INTERNAL_CB_SZ,
8572                                         &hdev->internal_cb_pool_dma_addr,
8573                                         GFP_KERNEL | __GFP_ZERO);
8574
8575         if (!hdev->internal_cb_pool_virt_addr)
8576                 return -ENOMEM;
8577
8578         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8579                         sizeof(struct packet_fence);
8580         min_alloc_order = ilog2(collective_cb_size);
8581
8582         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8583         if (!hdev->internal_cb_pool) {
8584                 dev_err(hdev->dev,
8585                         "Failed to create internal CB pool\n");
8586                 rc = -ENOMEM;
8587                 goto free_internal_cb_pool;
8588         }
8589
8590         rc = gen_pool_add(hdev->internal_cb_pool,
8591                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8592                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8593         if (rc) {
8594                 dev_err(hdev->dev,
8595                         "Failed to add memory to internal CB pool\n");
8596                 rc = -EFAULT;
8597                 goto destroy_internal_cb_pool;
8598         }
8599
8600         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8601                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8602                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8603
8604         if (!hdev->internal_cb_va_base) {
8605                 rc = -ENOMEM;
8606                 goto destroy_internal_cb_pool;
8607         }
8608
8609         mutex_lock(&ctx->mmu_lock);
8610         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8611                         hdev->internal_cb_pool_dma_addr,
8612                         HOST_SPACE_INTERNAL_CB_SZ);
8613
8614         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8615         mutex_unlock(&ctx->mmu_lock);
8616
8617         if (rc)
8618                 goto unreserve_internal_cb_pool;
8619
8620         return 0;
8621
8622 unreserve_internal_cb_pool:
8623         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8624                         HOST_SPACE_INTERNAL_CB_SZ);
8625 destroy_internal_cb_pool:
8626         gen_pool_destroy(hdev->internal_cb_pool);
8627 free_internal_cb_pool:
8628         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8629                         HOST_SPACE_INTERNAL_CB_SZ,
8630                         hdev->internal_cb_pool_virt_addr,
8631                         hdev->internal_cb_pool_dma_addr);
8632
8633         return rc;
8634 }
8635
8636 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8637                 struct hl_ctx *ctx)
8638 {
8639         struct gaudi_device *gaudi = hdev->asic_specific;
8640
8641         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8642                 return;
8643
8644         mutex_lock(&ctx->mmu_lock);
8645         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8646                         HOST_SPACE_INTERNAL_CB_SZ);
8647         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8648                         HOST_SPACE_INTERNAL_CB_SZ);
8649         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8650         mutex_unlock(&ctx->mmu_lock);
8651
8652         gen_pool_destroy(hdev->internal_cb_pool);
8653
8654         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8655                         HOST_SPACE_INTERNAL_CB_SZ,
8656                         hdev->internal_cb_pool_virt_addr,
8657                         hdev->internal_cb_pool_dma_addr);
8658 }
8659
8660 static int gaudi_ctx_init(struct hl_ctx *ctx)
8661 {
8662         int rc;
8663
8664         if (ctx->asid == HL_KERNEL_ASID_ID)
8665                 return 0;
8666
8667         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8668         if (rc)
8669                 return rc;
8670
8671         rc = gaudi_restore_user_registers(ctx->hdev);
8672         if (rc)
8673                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8674
8675         return rc;
8676 }
8677
8678 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8679 {
8680         if (ctx->asid == HL_KERNEL_ASID_ID)
8681                 return;
8682
8683         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8684 }
8685
8686 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8687 {
8688         return gaudi_cq_assignment[cq_idx];
8689 }
8690
8691 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8692 {
8693         return sizeof(struct packet_msg_short) +
8694                         sizeof(struct packet_msg_prot) * 2;
8695 }
8696
8697 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8698 {
8699         return sizeof(struct packet_msg_short) * 4 +
8700                         sizeof(struct packet_fence) +
8701                         sizeof(struct packet_msg_prot) * 2;
8702 }
8703
8704 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8705 {
8706         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8707 }
8708
8709 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8710                                 u32 size, bool eb)
8711 {
8712         struct hl_cb *cb = (struct hl_cb *) data;
8713         struct packet_msg_short *pkt;
8714         u32 value, ctl, pkt_size = sizeof(*pkt);
8715
8716         pkt = cb->kernel_address + size;
8717         memset(pkt, 0, pkt_size);
8718
8719         /* Inc by 1, Mode ADD */
8720         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8721         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8722
8723         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8724         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8725         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8726         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8727         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8728         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8729         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8730
8731         pkt->value = cpu_to_le32(value);
8732         pkt->ctl = cpu_to_le32(ctl);
8733
8734         return size + pkt_size;
8735 }
8736
8737 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8738                                         u16 addr)
8739 {
8740         u32 ctl, pkt_size = sizeof(*pkt);
8741
8742         memset(pkt, 0, pkt_size);
8743
8744         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8745         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8746         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8747         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8748         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8749         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8750
8751         pkt->value = cpu_to_le32(value);
8752         pkt->ctl = cpu_to_le32(ctl);
8753
8754         return pkt_size;
8755 }
8756
8757 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8758                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8759                 u16 sob_val, u16 mon_id)
8760 {
8761         u64 monitor_base;
8762         u32 ctl, value, pkt_size = sizeof(*pkt);
8763         u16 msg_addr_offset;
8764         u8 mask;
8765
8766         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8767                 dev_err(hdev->dev,
8768                         "sob_base %u (mask %#x) is not valid\n",
8769                         sob_base, sob_mask);
8770                 return 0;
8771         }
8772
8773         /*
8774          * monitor_base should be the content of the base0 address registers,
8775          * so it will be added to the msg short offsets
8776          */
8777         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8778
8779         msg_addr_offset =
8780                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8781                                 monitor_base;
8782
8783         memset(pkt, 0, pkt_size);
8784
8785         /* Monitor config packet: bind the monitor to a sync object */
8786         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8787         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8788         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8789                         0); /* GREATER OR EQUAL*/
8790         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8791
8792         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8793         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8794         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8795         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8796         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8797         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8798         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8799
8800         pkt->value = cpu_to_le32(value);
8801         pkt->ctl = cpu_to_le32(ctl);
8802
8803         return pkt_size;
8804 }
8805
8806 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8807 {
8808         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8809
8810         memset(pkt, 0, pkt_size);
8811
8812         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8813         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8814         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8815
8816         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8817         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8818         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8819         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8820
8821         pkt->cfg = cpu_to_le32(cfg);
8822         pkt->ctl = cpu_to_le32(ctl);
8823
8824         return pkt_size;
8825 }
8826
8827 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8828 {
8829         u32 offset, nic_index;
8830
8831         switch (queue_id) {
8832         case GAUDI_QUEUE_ID_DMA_0_0:
8833                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8834                 break;
8835         case GAUDI_QUEUE_ID_DMA_0_1:
8836                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8837                 break;
8838         case GAUDI_QUEUE_ID_DMA_0_2:
8839                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8840                 break;
8841         case GAUDI_QUEUE_ID_DMA_0_3:
8842                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8843                 break;
8844         case GAUDI_QUEUE_ID_DMA_1_0:
8845                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8846                 break;
8847         case GAUDI_QUEUE_ID_DMA_1_1:
8848                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8849                 break;
8850         case GAUDI_QUEUE_ID_DMA_1_2:
8851                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8852                 break;
8853         case GAUDI_QUEUE_ID_DMA_1_3:
8854                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8855                 break;
8856         case GAUDI_QUEUE_ID_DMA_5_0:
8857                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8858                 break;
8859         case GAUDI_QUEUE_ID_DMA_5_1:
8860                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8861                 break;
8862         case GAUDI_QUEUE_ID_DMA_5_2:
8863                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8864                 break;
8865         case GAUDI_QUEUE_ID_DMA_5_3:
8866                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8867                 break;
8868         case GAUDI_QUEUE_ID_TPC_7_0:
8869                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8870                 break;
8871         case GAUDI_QUEUE_ID_TPC_7_1:
8872                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8873                 break;
8874         case GAUDI_QUEUE_ID_TPC_7_2:
8875                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8876                 break;
8877         case GAUDI_QUEUE_ID_TPC_7_3:
8878                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8879                 break;
8880         case GAUDI_QUEUE_ID_NIC_0_0:
8881         case GAUDI_QUEUE_ID_NIC_1_0:
8882         case GAUDI_QUEUE_ID_NIC_2_0:
8883         case GAUDI_QUEUE_ID_NIC_3_0:
8884         case GAUDI_QUEUE_ID_NIC_4_0:
8885         case GAUDI_QUEUE_ID_NIC_5_0:
8886         case GAUDI_QUEUE_ID_NIC_6_0:
8887         case GAUDI_QUEUE_ID_NIC_7_0:
8888         case GAUDI_QUEUE_ID_NIC_8_0:
8889         case GAUDI_QUEUE_ID_NIC_9_0:
8890                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8891                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8892                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8893                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8894                 break;
8895         case GAUDI_QUEUE_ID_NIC_0_1:
8896         case GAUDI_QUEUE_ID_NIC_1_1:
8897         case GAUDI_QUEUE_ID_NIC_2_1:
8898         case GAUDI_QUEUE_ID_NIC_3_1:
8899         case GAUDI_QUEUE_ID_NIC_4_1:
8900         case GAUDI_QUEUE_ID_NIC_5_1:
8901         case GAUDI_QUEUE_ID_NIC_6_1:
8902         case GAUDI_QUEUE_ID_NIC_7_1:
8903         case GAUDI_QUEUE_ID_NIC_8_1:
8904         case GAUDI_QUEUE_ID_NIC_9_1:
8905                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8906                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8907                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8908                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8909                 break;
8910         case GAUDI_QUEUE_ID_NIC_0_2:
8911         case GAUDI_QUEUE_ID_NIC_1_2:
8912         case GAUDI_QUEUE_ID_NIC_2_2:
8913         case GAUDI_QUEUE_ID_NIC_3_2:
8914         case GAUDI_QUEUE_ID_NIC_4_2:
8915         case GAUDI_QUEUE_ID_NIC_5_2:
8916         case GAUDI_QUEUE_ID_NIC_6_2:
8917         case GAUDI_QUEUE_ID_NIC_7_2:
8918         case GAUDI_QUEUE_ID_NIC_8_2:
8919         case GAUDI_QUEUE_ID_NIC_9_2:
8920                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8921                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8922                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8923                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8924                 break;
8925         case GAUDI_QUEUE_ID_NIC_0_3:
8926         case GAUDI_QUEUE_ID_NIC_1_3:
8927         case GAUDI_QUEUE_ID_NIC_2_3:
8928         case GAUDI_QUEUE_ID_NIC_3_3:
8929         case GAUDI_QUEUE_ID_NIC_4_3:
8930         case GAUDI_QUEUE_ID_NIC_5_3:
8931         case GAUDI_QUEUE_ID_NIC_6_3:
8932         case GAUDI_QUEUE_ID_NIC_7_3:
8933         case GAUDI_QUEUE_ID_NIC_8_3:
8934         case GAUDI_QUEUE_ID_NIC_9_3:
8935                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8936                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8937                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8938                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8939                 break;
8940         default:
8941                 return -EINVAL;
8942         }
8943
8944         *addr = CFG_BASE + offset;
8945
8946         return 0;
8947 }
8948
8949 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8950 {
8951         u64 monitor_base;
8952         u32 size = 0;
8953         u16 msg_addr_offset;
8954
8955         /*
8956          * monitor_base should be the content of the base0 address registers,
8957          * so it will be added to the msg short offsets
8958          */
8959         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8960
8961         /* First monitor config packet: low address of the sync */
8962         msg_addr_offset =
8963                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8964                                 monitor_base;
8965
8966         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8967                                         msg_addr_offset);
8968
8969         /* Second monitor config packet: high address of the sync */
8970         msg_addr_offset =
8971                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8972                                 monitor_base;
8973
8974         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8975                                         msg_addr_offset);
8976
8977         /*
8978          * Third monitor config packet: the payload, i.e. what to write when the
8979          * sync triggers
8980          */
8981         msg_addr_offset =
8982                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8983                                 monitor_base;
8984
8985         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8986
8987         return size;
8988 }
8989
8990 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8991                                 struct hl_gen_wait_properties *prop)
8992 {
8993         struct hl_cb *cb = (struct hl_cb *) prop->data;
8994         void *buf = cb->kernel_address;
8995         u64 fence_addr = 0;
8996         u32 size = prop->size;
8997
8998         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8999                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
9000                                 prop->q_idx);
9001                 return 0;
9002         }
9003
9004         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
9005         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
9006                         prop->sob_mask, prop->sob_val, prop->mon_id);
9007         size += gaudi_add_fence_pkt(buf + size);
9008
9009         return size;
9010 }
9011
9012 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9013 {
9014         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9015
9016         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9017                 hw_sob->sob_id);
9018
9019         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9020                         hw_sob->sob_id * 4, 0);
9021
9022         kref_init(&hw_sob->kref);
9023 }
9024
9025 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9026 {
9027         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9028                                                         HL_POWER9_HOST_MAGIC) {
9029                 hdev->power9_64bit_dma_enable = 1;
9030                 hdev->dma_mask = 64;
9031         } else {
9032                 hdev->power9_64bit_dma_enable = 0;
9033                 hdev->dma_mask = 48;
9034         }
9035 }
9036
9037 static u64 gaudi_get_device_time(struct hl_device *hdev)
9038 {
9039         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9040
9041         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9042 }
9043
9044 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9045                                 u32 *block_size, u32 *block_id)
9046 {
9047         return -EPERM;
9048 }
9049
9050 static int gaudi_block_mmap(struct hl_device *hdev,
9051                                 struct vm_area_struct *vma,
9052                                 u32 block_id, u32 block_size)
9053 {
9054         return -EPERM;
9055 }
9056
9057 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9058 {
9059         struct cpu_dyn_regs *dyn_regs =
9060                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9061         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9062                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9063                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9064
9065         WREG32(irq_handler_offset,
9066                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9067 }
9068
9069 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9070 {
9071         switch (pll_idx) {
9072         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9073         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9074         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9075         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9076         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9077         case HL_GAUDI_MME_PLL: return MME_PLL;
9078         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9079         case HL_GAUDI_IF_PLL: return IF_PLL;
9080         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9081         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9082         default: return -EINVAL;
9083         }
9084 }
9085
9086 static int gaudi_add_sync_to_engine_map_entry(
9087         struct hl_sync_to_engine_map *map, u32 reg_value,
9088         enum hl_sync_engine_type engine_type, u32 engine_id)
9089 {
9090         struct hl_sync_to_engine_map_entry *entry;
9091
9092         /* Reg value represents a partial address of sync object,
9093          * it is used as unique identifier. For this we need to
9094          * clear the cutoff cfg base bits from the value.
9095          */
9096         if (reg_value == 0 || reg_value == 0xffffffff)
9097                 return 0;
9098         reg_value -= (u32)CFG_BASE;
9099
9100         /* create a new hash entry */
9101         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9102         if (!entry)
9103                 return -ENOMEM;
9104         entry->engine_type = engine_type;
9105         entry->engine_id = engine_id;
9106         entry->sync_id = reg_value;
9107         hash_add(map->tb, &entry->node, reg_value);
9108
9109         return 0;
9110 }
9111
9112 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9113                                 struct hl_sync_to_engine_map *map)
9114 {
9115         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9116         struct gaudi_device *gaudi = hdev->asic_specific;
9117         int i, j, rc;
9118         u32 reg_value;
9119
9120         /* Iterate over TPC engines */
9121         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9122                 /* TPC registered must be accessed with clock gating disabled */
9123                 mutex_lock(&gaudi->clk_gate_mutex);
9124                 hdev->asic_funcs->disable_clock_gating(hdev);
9125
9126                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9127                                         sds->props[SP_NEXT_TPC] * i);
9128
9129                 /* We can reenable clock_gating */
9130                 hdev->asic_funcs->set_clock_gating(hdev);
9131                 mutex_unlock(&gaudi->clk_gate_mutex);
9132
9133                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9134                                                         ENGINE_TPC, i);
9135                 if (rc)
9136                         goto free_sync_to_engine_map;
9137         }
9138
9139         /* Iterate over MME engines */
9140         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9141                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9142                         /* MME registered must be accessed with clock gating
9143                          * disabled
9144                          */
9145                         mutex_lock(&gaudi->clk_gate_mutex);
9146                         hdev->asic_funcs->disable_clock_gating(hdev);
9147
9148                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9149                                                 sds->props[SP_NEXT_MME] * i +
9150                                                 j * sizeof(u32));
9151
9152                         /* We can reenable clock_gating */
9153                         hdev->asic_funcs->set_clock_gating(hdev);
9154                         mutex_unlock(&gaudi->clk_gate_mutex);
9155
9156                         rc = gaudi_add_sync_to_engine_map_entry(
9157                                 map, reg_value, ENGINE_MME,
9158                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9159                         if (rc)
9160                                 goto free_sync_to_engine_map;
9161                 }
9162         }
9163
9164         /* Iterate over DMA engines */
9165         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9166                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9167                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9168                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9169                                                         ENGINE_DMA, i);
9170                 if (rc)
9171                         goto free_sync_to_engine_map;
9172         }
9173
9174         return 0;
9175
9176 free_sync_to_engine_map:
9177         hl_state_dump_free_sync_to_engine_map(map);
9178
9179         return rc;
9180 }
9181
9182 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9183 {
9184         return FIELD_GET(
9185                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9186                 mon->status);
9187 }
9188
9189 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9190 {
9191         const size_t max_write = 10;
9192         u32 gid, mask, sob;
9193         int i, offset;
9194
9195         /* Sync object ID is calculated as follows:
9196          * (8 * group_id + cleared bits in mask)
9197          */
9198         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9199                         mon->arm_data);
9200         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9201                         mon->arm_data);
9202
9203         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9204                 max_write; mask >>= 1, i++) {
9205                 if (!(mask & 1)) {
9206                         sob = gid * MONITOR_MAX_SOBS + i;
9207
9208                         if (offset > 0)
9209                                 offset += snprintf(sobs + offset, max_write,
9210                                                         ", ");
9211
9212                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9213                 }
9214         }
9215 }
9216
9217 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9218                                 struct hl_device *hdev,
9219                                 struct hl_mon_state_dump *mon)
9220 {
9221         const char *name;
9222         char scratch_buf1[BIN_REG_STRING_SIZE],
9223                 scratch_buf2[BIN_REG_STRING_SIZE];
9224         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9225
9226         name = hl_state_dump_get_monitor_name(hdev, mon);
9227         if (!name)
9228                 name = "";
9229
9230         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9231
9232         return hl_snprintf_resize(
9233                 buf, size, offset,
9234                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9235                 mon->id, name,
9236                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9237                                 mon->arm_data),
9238                 hl_format_as_binary(
9239                         scratch_buf1, sizeof(scratch_buf1),
9240                         FIELD_GET(
9241                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9242                                 mon->arm_data)),
9243                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9244                                 mon->arm_data),
9245                 mon->wr_data,
9246                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9247                 hl_format_as_binary(
9248                         scratch_buf2, sizeof(scratch_buf2),
9249                         FIELD_GET(
9250                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9251                                 mon->status)),
9252                 monitored_sobs);
9253 }
9254
9255
9256 static int gaudi_print_fences_single_engine(
9257         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9258         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9259         size_t *size, size_t *offset)
9260 {
9261         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9262         int rc = -ENOMEM, i;
9263         u32 *statuses, *fences;
9264
9265         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9266                         sizeof(*statuses), GFP_KERNEL);
9267         if (!statuses)
9268                 goto out;
9269
9270         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9271                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9272                          sizeof(*fences), GFP_KERNEL);
9273         if (!fences)
9274                 goto free_status;
9275
9276         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9277                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9278
9279         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9280                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9281                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9282
9283         /* The actual print */
9284         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9285                 u32 fence_id;
9286                 u64 fence_cnt, fence_rdata;
9287                 const char *engine_name;
9288
9289                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9290                         statuses[i]))
9291                         continue;
9292
9293                 fence_id =
9294                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9295                 fence_cnt = base_offset + CFG_BASE +
9296                         sizeof(u32) *
9297                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9298                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9299                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9300                 engine_name = hl_sync_engine_to_string(engine_type);
9301
9302                 rc = hl_snprintf_resize(
9303                         buf, size, offset,
9304                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9305                         engine_name, engine_id,
9306                         i, fence_id,
9307                         fence_cnt, engine_name, engine_id, fence_id, i,
9308                         fence_rdata, engine_name, engine_id, fence_id, i,
9309                         fences[fence_id],
9310                         statuses[i]);
9311                 if (rc)
9312                         goto free_fences;
9313         }
9314
9315         rc = 0;
9316
9317 free_fences:
9318         kfree(fences);
9319 free_status:
9320         kfree(statuses);
9321 out:
9322         return rc;
9323 }
9324
9325
9326 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9327         .monitor_valid = gaudi_monitor_valid,
9328         .print_single_monitor = gaudi_print_single_monitor,
9329         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9330         .print_fences_single_engine = gaudi_print_fences_single_engine,
9331 };
9332
9333 static void gaudi_state_dump_init(struct hl_device *hdev)
9334 {
9335         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9336         int i;
9337
9338         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9339                 hash_add(sds->so_id_to_str_tb,
9340                         &gaudi_so_id_to_str[i].node,
9341                         gaudi_so_id_to_str[i].id);
9342
9343         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9344                 hash_add(sds->monitor_id_to_str_tb,
9345                         &gaudi_monitor_id_to_str[i].node,
9346                         gaudi_monitor_id_to_str[i].id);
9347
9348         sds->props = gaudi_state_dump_specs_props;
9349
9350         sds->sync_namager_names = gaudi_sync_manager_names;
9351
9352         sds->funcs = gaudi_state_dump_funcs;
9353 }
9354
9355 static const struct hl_asic_funcs gaudi_funcs = {
9356         .early_init = gaudi_early_init,
9357         .early_fini = gaudi_early_fini,
9358         .late_init = gaudi_late_init,
9359         .late_fini = gaudi_late_fini,
9360         .sw_init = gaudi_sw_init,
9361         .sw_fini = gaudi_sw_fini,
9362         .hw_init = gaudi_hw_init,
9363         .hw_fini = gaudi_hw_fini,
9364         .halt_engines = gaudi_halt_engines,
9365         .suspend = gaudi_suspend,
9366         .resume = gaudi_resume,
9367         .mmap = gaudi_mmap,
9368         .ring_doorbell = gaudi_ring_doorbell,
9369         .pqe_write = gaudi_pqe_write,
9370         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9371         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9372         .scrub_device_mem = gaudi_scrub_device_mem,
9373         .get_int_queue_base = gaudi_get_int_queue_base,
9374         .test_queues = gaudi_test_queues,
9375         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9376         .asic_dma_pool_free = gaudi_dma_pool_free,
9377         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9378         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9379         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9380         .cs_parser = gaudi_cs_parser,
9381         .asic_dma_map_sg = gaudi_dma_map_sg,
9382         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9383         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9384         .update_eq_ci = gaudi_update_eq_ci,
9385         .context_switch = gaudi_context_switch,
9386         .restore_phase_topology = gaudi_restore_phase_topology,
9387         .debugfs_read32 = gaudi_debugfs_read32,
9388         .debugfs_write32 = gaudi_debugfs_write32,
9389         .debugfs_read64 = gaudi_debugfs_read64,
9390         .debugfs_write64 = gaudi_debugfs_write64,
9391         .debugfs_read_dma = gaudi_debugfs_read_dma,
9392         .add_device_attr = gaudi_add_device_attr,
9393         .handle_eqe = gaudi_handle_eqe,
9394         .set_pll_profile = gaudi_set_pll_profile,
9395         .get_events_stat = gaudi_get_events_stat,
9396         .read_pte = gaudi_read_pte,
9397         .write_pte = gaudi_write_pte,
9398         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9399         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9400         .send_heartbeat = gaudi_send_heartbeat,
9401         .set_clock_gating = gaudi_set_clock_gating,
9402         .disable_clock_gating = gaudi_disable_clock_gating,
9403         .debug_coresight = gaudi_debug_coresight,
9404         .is_device_idle = gaudi_is_device_idle,
9405         .soft_reset_late_init = gaudi_soft_reset_late_init,
9406         .hw_queues_lock = gaudi_hw_queues_lock,
9407         .hw_queues_unlock = gaudi_hw_queues_unlock,
9408         .get_pci_id = gaudi_get_pci_id,
9409         .get_eeprom_data = gaudi_get_eeprom_data,
9410         .send_cpu_message = gaudi_send_cpu_message,
9411         .pci_bars_map = gaudi_pci_bars_map,
9412         .init_iatu = gaudi_init_iatu,
9413         .rreg = hl_rreg,
9414         .wreg = hl_wreg,
9415         .halt_coresight = gaudi_halt_coresight,
9416         .ctx_init = gaudi_ctx_init,
9417         .ctx_fini = gaudi_ctx_fini,
9418         .get_clk_rate = gaudi_get_clk_rate,
9419         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9420         .load_firmware_to_device = gaudi_load_firmware_to_device,
9421         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9422         .get_signal_cb_size = gaudi_get_signal_cb_size,
9423         .get_wait_cb_size = gaudi_get_wait_cb_size,
9424         .gen_signal_cb = gaudi_gen_signal_cb,
9425         .gen_wait_cb = gaudi_gen_wait_cb,
9426         .reset_sob = gaudi_reset_sob,
9427         .reset_sob_group = gaudi_reset_sob_group,
9428         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9429         .get_device_time = gaudi_get_device_time,
9430         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9431         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9432         .scramble_addr = hl_mmu_scramble_addr,
9433         .descramble_addr = hl_mmu_descramble_addr,
9434         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9435         .get_hw_block_id = gaudi_get_hw_block_id,
9436         .hw_block_mmap = gaudi_block_mmap,
9437         .enable_events_from_fw = gaudi_enable_events_from_fw,
9438         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9439         .init_firmware_loader = gaudi_init_firmware_loader,
9440         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9441         .state_dump_init = gaudi_state_dump_init,
9442         .get_sob_addr = gaudi_get_sob_addr,
9443         .set_pci_memory_regions = gaudi_set_pci_memory_regions
9444 };
9445
9446 /**
9447  * gaudi_set_asic_funcs - set GAUDI function pointers
9448  *
9449  * @hdev: pointer to hl_device structure
9450  *
9451  */
9452 void gaudi_set_asic_funcs(struct hl_device *hdev)
9453 {
9454         hdev->asic_funcs = &gaudi_funcs;
9455 }