habanalabs: set max power on device init per ASIC
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 #define MONITOR_SOB_STRING_SIZE         256
112
113 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
114         GAUDI_QUEUE_ID_DMA_0_0,
115         GAUDI_QUEUE_ID_DMA_0_1,
116         GAUDI_QUEUE_ID_DMA_0_2,
117         GAUDI_QUEUE_ID_DMA_0_3,
118         GAUDI_QUEUE_ID_DMA_1_0,
119         GAUDI_QUEUE_ID_DMA_1_1,
120         GAUDI_QUEUE_ID_DMA_1_2,
121         GAUDI_QUEUE_ID_DMA_1_3
122 };
123
124 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
125                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
126                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
127                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
128                 "gaudi cpu eq"
129 };
130
131 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
132         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
133         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
134         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
135         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
136         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
137         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
138         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
139         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
140 };
141
142 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
143         [0] = GAUDI_QUEUE_ID_DMA_0_0,
144         [1] = GAUDI_QUEUE_ID_DMA_0_1,
145         [2] = GAUDI_QUEUE_ID_DMA_0_2,
146         [3] = GAUDI_QUEUE_ID_DMA_0_3,
147         [4] = GAUDI_QUEUE_ID_DMA_1_0,
148         [5] = GAUDI_QUEUE_ID_DMA_1_1,
149         [6] = GAUDI_QUEUE_ID_DMA_1_2,
150         [7] = GAUDI_QUEUE_ID_DMA_1_3,
151 };
152
153 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
154         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
155         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
156         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
157         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
158         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
159         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
160         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
161         [PACKET_FENCE]          = sizeof(struct packet_fence),
162         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
163         [PACKET_NOP]            = sizeof(struct packet_nop),
164         [PACKET_STOP]           = sizeof(struct packet_stop),
165         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
166         [PACKET_WAIT]           = sizeof(struct packet_wait),
167         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
168 };
169
170 static inline bool validate_packet_id(enum packet_id id)
171 {
172         switch (id) {
173         case PACKET_WREG_32:
174         case PACKET_WREG_BULK:
175         case PACKET_MSG_LONG:
176         case PACKET_MSG_SHORT:
177         case PACKET_CP_DMA:
178         case PACKET_REPEAT:
179         case PACKET_MSG_PROT:
180         case PACKET_FENCE:
181         case PACKET_LIN_DMA:
182         case PACKET_NOP:
183         case PACKET_STOP:
184         case PACKET_ARB_POINT:
185         case PACKET_WAIT:
186         case PACKET_LOAD_AND_EXE:
187                 return true;
188         default:
189                 return false;
190         }
191 }
192
193 static const char * const
194 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
195         "tpc_address_exceed_slm",
196         "tpc_div_by_0",
197         "tpc_spu_mac_overflow",
198         "tpc_spu_addsub_overflow",
199         "tpc_spu_abs_overflow",
200         "tpc_spu_fp_dst_nan_inf",
201         "tpc_spu_fp_dst_denorm",
202         "tpc_vpu_mac_overflow",
203         "tpc_vpu_addsub_overflow",
204         "tpc_vpu_abs_overflow",
205         "tpc_vpu_fp_dst_nan_inf",
206         "tpc_vpu_fp_dst_denorm",
207         "tpc_assertions",
208         "tpc_illegal_instruction",
209         "tpc_pc_wrap_around",
210         "tpc_qm_sw_err",
211         "tpc_hbw_rresp_err",
212         "tpc_hbw_bresp_err",
213         "tpc_lbw_rresp_err",
214         "tpc_lbw_bresp_err"
215 };
216
217 static const char * const
218 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
219         "PQ AXI HBW error",
220         "CQ AXI HBW error",
221         "CP AXI HBW error",
222         "CP error due to undefined OPCODE",
223         "CP encountered STOP OPCODE",
224         "CP AXI LBW error",
225         "CP WRREG32 or WRBULK returned error",
226         "N/A",
227         "FENCE 0 inc over max value and clipped",
228         "FENCE 1 inc over max value and clipped",
229         "FENCE 2 inc over max value and clipped",
230         "FENCE 3 inc over max value and clipped",
231         "FENCE 0 dec under min value and clipped",
232         "FENCE 1 dec under min value and clipped",
233         "FENCE 2 dec under min value and clipped",
234         "FENCE 3 dec under min value and clipped"
235 };
236
237 static const char * const
238 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
239         "Choice push while full error",
240         "Choice Q watchdog error",
241         "MSG AXI LBW returned with error"
242 };
243
244 enum gaudi_sm_sei_cause {
245         GAUDI_SM_SEI_SO_OVERFLOW,
246         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
247         GAUDI_SM_SEI_AXI_RESPONSE_ERR
248 };
249
250 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
251         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
256         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
257         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
258         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
259         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
355         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
356         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
357         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
358         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
359         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
360         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
361         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
362         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
363         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
364 };
365
366 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
367         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
368         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
369         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
370         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
371         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
372         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
373         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
374         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
375         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
376         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
377         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
378         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
379         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
380         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
381         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
382         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
383         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
384         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
385         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
386         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
387         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
388         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
389         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
390         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
391         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
392         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
393         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
394 };
395
396 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
397         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
398         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
399         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
400         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
401         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
402         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
403         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
404         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
405         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
406         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
407         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
408 };
409
410 static s64 gaudi_state_dump_specs_props[] = {
411         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
412         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
413         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
414         [SP_MON_OBJ_WR_ADDR_LOW] =
415                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
416         [SP_MON_OBJ_WR_ADDR_HIGH] =
417                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
418         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
419         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
420         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
421         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
422         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
423         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
424         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
425         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
426         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
427         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
428         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
429         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
430         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
431         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
432         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
433         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
434         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
435         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
436         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
437         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
438         [SP_FENCE0_CNT_OFFSET] =
439                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
440         [SP_FENCE0_RDATA_OFFSET] =
441                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
442         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
443         [SP_NUM_CORES] = 1,
444 };
445
446 /* The order here is opposite to the order of the indexing in the h/w.
447  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
448  */
449 static const char * const gaudi_sync_manager_names[] = {
450         "SYNC_MGR_E_N",
451         "SYNC_MGR_W_N",
452         "SYNC_MGR_E_S",
453         "SYNC_MGR_W_S",
454         NULL
455 };
456
457 struct ecc_info_extract_params {
458         u64 block_address;
459         u32 num_memories;
460         bool derr;
461 };
462
463 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
464                                                                 u64 phys_addr);
465 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
466                                         struct hl_cs_job *job);
467 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
468                                         u32 size, u64 val);
469 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
470                                         u32 num_regs, u32 val);
471 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
472                                 u32 tpc_id);
473 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
474 static int gaudi_cpucp_info_get(struct hl_device *hdev);
475 static void gaudi_disable_clock_gating(struct hl_device *hdev);
476 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
477 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
478                                 u32 size, bool eb);
479 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
480                                 struct hl_gen_wait_properties *prop);
481 static inline enum hl_collective_mode
482 get_collective_mode(struct hl_device *hdev, u32 queue_id)
483 {
484         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
485                 return HL_COLLECTIVE_MASTER;
486
487         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
488                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
489                 return HL_COLLECTIVE_SLAVE;
490
491         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
492                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
493                 return HL_COLLECTIVE_SLAVE;
494
495         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
496                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
497                 return HL_COLLECTIVE_SLAVE;
498
499         return HL_COLLECTIVE_NOT_SUPPORTED;
500 }
501
502 static inline void set_default_power_values(struct hl_device *hdev)
503 {
504         struct asic_fixed_properties *prop = &hdev->asic_prop;
505
506         if (hdev->card_type == cpucp_card_type_pmc) {
507                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
508
509                 if (prop->fw_security_enabled)
510                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
511                 else
512                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
513         } else {
514                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
515                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
516         }
517 }
518
519 static int gaudi_set_fixed_properties(struct hl_device *hdev)
520 {
521         struct asic_fixed_properties *prop = &hdev->asic_prop;
522         u32 num_sync_stream_queues = 0;
523         int i;
524
525         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
526         prop->hw_queues_props = kcalloc(prop->max_queues,
527                         sizeof(struct hw_queue_properties),
528                         GFP_KERNEL);
529
530         if (!prop->hw_queues_props)
531                 return -ENOMEM;
532
533         for (i = 0 ; i < prop->max_queues ; i++) {
534                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
535                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
536                         prop->hw_queues_props[i].driver_only = 0;
537                         prop->hw_queues_props[i].supports_sync_stream = 1;
538                         prop->hw_queues_props[i].cb_alloc_flags =
539                                 CB_ALLOC_KERNEL;
540                         num_sync_stream_queues++;
541                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
542                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
543                         prop->hw_queues_props[i].driver_only = 1;
544                         prop->hw_queues_props[i].supports_sync_stream = 0;
545                         prop->hw_queues_props[i].cb_alloc_flags =
546                                 CB_ALLOC_KERNEL;
547                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
548                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
549                         prop->hw_queues_props[i].driver_only = 0;
550                         prop->hw_queues_props[i].supports_sync_stream = 0;
551                         prop->hw_queues_props[i].cb_alloc_flags =
552                                 CB_ALLOC_USER;
553
554                 }
555                 prop->hw_queues_props[i].collective_mode =
556                                                 get_collective_mode(hdev, i);
557         }
558
559         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
560         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
561         prop->collective_first_sob = 0;
562         prop->collective_first_mon = 0;
563
564         /* 2 SOBs per internal queue stream are reserved for collective */
565         prop->sync_stream_first_sob =
566                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
567                         * QMAN_STREAMS * HL_RSVD_SOBS;
568
569         /* 1 monitor per internal queue stream are reserved for collective
570          * 2 monitors per external queue stream are reserved for collective
571          */
572         prop->sync_stream_first_mon =
573                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
574                         (NUMBER_OF_EXT_HW_QUEUES * 2);
575
576         prop->dram_base_address = DRAM_PHYS_BASE;
577         prop->dram_size = GAUDI_HBM_SIZE_32GB;
578         prop->dram_end_address = prop->dram_base_address +
579                                         prop->dram_size;
580         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
581
582         prop->sram_base_address = SRAM_BASE_ADDR;
583         prop->sram_size = SRAM_SIZE;
584         prop->sram_end_address = prop->sram_base_address +
585                                         prop->sram_size;
586         prop->sram_user_base_address = prop->sram_base_address +
587                                         SRAM_USER_BASE_OFFSET;
588
589         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
590         if (hdev->pldm)
591                 prop->mmu_pgt_size = 0x800000; /* 8MB */
592         else
593                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
594         prop->mmu_pte_size = HL_PTE_SIZE;
595         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
596         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
597         prop->dram_page_size = PAGE_SIZE_2MB;
598         prop->dram_supports_virtual_memory = false;
599
600         prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT;
601         prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT;
602         prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT;
603         prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT;
604         prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT;
605         prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK;
606         prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK;
607         prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK;
608         prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK;
609         prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK;
610         prop->pmmu.start_addr = VA_HOST_SPACE_START;
611         prop->pmmu.end_addr =
612                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
613         prop->pmmu.page_size = PAGE_SIZE_4KB;
614         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
615         prop->pmmu.last_mask = LAST_MASK;
616         /* TODO: will be duplicated until implementing per-MMU props */
617         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
618         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
619
620         /* PMMU and HPMMU are the same except of page size */
621         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
622         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
623
624         /* shifts and masks are the same in PMMU and DMMU */
625         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
626         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
627         prop->dmmu.end_addr = VA_HOST_SPACE_END;
628         prop->dmmu.page_size = PAGE_SIZE_2MB;
629
630         prop->cfg_size = CFG_SIZE;
631         prop->max_asid = MAX_ASID;
632         prop->num_of_events = GAUDI_EVENT_SIZE;
633         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
634
635         set_default_power_values(hdev);
636
637         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
638         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
639
640         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
641         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
642
643         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
644                                         CARD_NAME_MAX_LEN);
645
646         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
647
648         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
649                         prop->sync_stream_first_sob +
650                         (num_sync_stream_queues * HL_RSVD_SOBS);
651         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
652                         prop->sync_stream_first_mon +
653                         (num_sync_stream_queues * HL_RSVD_MONS);
654
655         prop->first_available_user_msix_interrupt = USHRT_MAX;
656
657         for (i = 0 ; i < HL_MAX_DCORES ; i++)
658                 prop->first_available_cq[i] = USHRT_MAX;
659
660         prop->fw_cpu_boot_dev_sts0_valid = false;
661         prop->fw_cpu_boot_dev_sts1_valid = false;
662         prop->hard_reset_done_by_fw = false;
663         prop->gic_interrupts_enable = true;
664
665         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
666
667         prop->clk_pll_index = HL_GAUDI_MME_PLL;
668         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
669
670         prop->use_get_power_for_reset_history = true;
671
672         prop->configurable_stop_on_err = true;
673
674         prop->set_max_power_on_device_init = true;
675
676         return 0;
677 }
678
679 static int gaudi_pci_bars_map(struct hl_device *hdev)
680 {
681         static const char * const name[] = {"SRAM", "CFG", "HBM"};
682         bool is_wc[3] = {false, false, true};
683         int rc;
684
685         rc = hl_pci_bars_map(hdev, name, is_wc);
686         if (rc)
687                 return rc;
688
689         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
690                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
691
692         return 0;
693 }
694
695 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
696 {
697         struct gaudi_device *gaudi = hdev->asic_specific;
698         struct hl_inbound_pci_region pci_region;
699         u64 old_addr = addr;
700         int rc;
701
702         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
703                 return old_addr;
704
705         if (hdev->asic_prop.iatu_done_by_fw)
706                 return U64_MAX;
707
708         /* Inbound Region 2 - Bar 4 - Point to HBM */
709         pci_region.mode = PCI_BAR_MATCH_MODE;
710         pci_region.bar = HBM_BAR_ID;
711         pci_region.addr = addr;
712         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
713         if (rc)
714                 return U64_MAX;
715
716         if (gaudi) {
717                 old_addr = gaudi->hbm_bar_cur_addr;
718                 gaudi->hbm_bar_cur_addr = addr;
719         }
720
721         return old_addr;
722 }
723
724 static int gaudi_init_iatu(struct hl_device *hdev)
725 {
726         struct hl_inbound_pci_region inbound_region;
727         struct hl_outbound_pci_region outbound_region;
728         int rc;
729
730         if (hdev->asic_prop.iatu_done_by_fw)
731                 return 0;
732
733         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
734         inbound_region.mode = PCI_BAR_MATCH_MODE;
735         inbound_region.bar = SRAM_BAR_ID;
736         inbound_region.addr = SRAM_BASE_ADDR;
737         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
738         if (rc)
739                 goto done;
740
741         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
742         inbound_region.mode = PCI_BAR_MATCH_MODE;
743         inbound_region.bar = CFG_BAR_ID;
744         inbound_region.addr = SPI_FLASH_BASE_ADDR;
745         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
746         if (rc)
747                 goto done;
748
749         /* Inbound Region 2 - Bar 4 - Point to HBM */
750         inbound_region.mode = PCI_BAR_MATCH_MODE;
751         inbound_region.bar = HBM_BAR_ID;
752         inbound_region.addr = DRAM_PHYS_BASE;
753         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
754         if (rc)
755                 goto done;
756
757         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
758
759         /* Outbound Region 0 - Point to Host */
760         outbound_region.addr = HOST_PHYS_BASE;
761         outbound_region.size = HOST_PHYS_SIZE;
762         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
763
764 done:
765         return rc;
766 }
767
768 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
769 {
770         return RREG32(mmHW_STATE);
771 }
772
773 static int gaudi_early_init(struct hl_device *hdev)
774 {
775         struct asic_fixed_properties *prop = &hdev->asic_prop;
776         struct pci_dev *pdev = hdev->pdev;
777         u32 fw_boot_status;
778         int rc;
779
780         rc = gaudi_set_fixed_properties(hdev);
781         if (rc) {
782                 dev_err(hdev->dev, "Failed setting fixed properties\n");
783                 return rc;
784         }
785
786         /* Check BAR sizes */
787         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
788                 dev_err(hdev->dev,
789                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
790                         SRAM_BAR_ID,
791                         (unsigned long long) pci_resource_len(pdev,
792                                                         SRAM_BAR_ID),
793                         SRAM_BAR_SIZE);
794                 rc = -ENODEV;
795                 goto free_queue_props;
796         }
797
798         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
799                 dev_err(hdev->dev,
800                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
801                         CFG_BAR_ID,
802                         (unsigned long long) pci_resource_len(pdev,
803                                                                 CFG_BAR_ID),
804                         CFG_BAR_SIZE);
805                 rc = -ENODEV;
806                 goto free_queue_props;
807         }
808
809         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
810         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
811
812         /* If FW security is enabled at this point it means no access to ELBI */
813         if (hdev->asic_prop.fw_security_enabled) {
814                 hdev->asic_prop.iatu_done_by_fw = true;
815
816                 /*
817                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
818                  * decision can only be taken based on PCI ID security.
819                  */
820                 hdev->asic_prop.gic_interrupts_enable = false;
821                 goto pci_init;
822         }
823
824         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
825                                 &fw_boot_status);
826         if (rc)
827                 goto free_queue_props;
828
829         /* Check whether FW is configuring iATU */
830         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
831                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
832                 hdev->asic_prop.iatu_done_by_fw = true;
833
834 pci_init:
835         rc = hl_pci_init(hdev);
836         if (rc)
837                 goto free_queue_props;
838
839         /* Before continuing in the initialization, we need to read the preboot
840          * version to determine whether we run with a security-enabled firmware
841          */
842         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
843                                         mmCPU_BOOT_DEV_STS0,
844                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
845                                         mmCPU_BOOT_ERR1,
846                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
847         if (rc) {
848                 if (hdev->reset_on_preboot_fail)
849                         hdev->asic_funcs->hw_fini(hdev, true, false);
850                 goto pci_fini;
851         }
852
853         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
854                 dev_info(hdev->dev,
855                         "H/W state is dirty, must reset before initializing\n");
856                 hdev->asic_funcs->hw_fini(hdev, true, false);
857         }
858
859         return 0;
860
861 pci_fini:
862         hl_pci_fini(hdev);
863 free_queue_props:
864         kfree(hdev->asic_prop.hw_queues_props);
865         return rc;
866 }
867
868 static int gaudi_early_fini(struct hl_device *hdev)
869 {
870         kfree(hdev->asic_prop.hw_queues_props);
871         hl_pci_fini(hdev);
872
873         return 0;
874 }
875
876 /**
877  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
878  *
879  * @hdev: pointer to hl_device structure
880  *
881  */
882 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
883 {
884         struct asic_fixed_properties *prop = &hdev->asic_prop;
885         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
886         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
887         int rc;
888
889         if (hdev->asic_prop.fw_security_enabled) {
890                 struct gaudi_device *gaudi = hdev->asic_specific;
891
892                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
893                         return 0;
894
895                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
896
897                 if (rc)
898                         return rc;
899
900                 freq = pll_freq_arr[2];
901         } else {
902                 /* Backward compatibility */
903                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
904                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
905                 nr = RREG32(mmPSOC_CPU_PLL_NR);
906                 nf = RREG32(mmPSOC_CPU_PLL_NF);
907                 od = RREG32(mmPSOC_CPU_PLL_OD);
908
909                 if (div_sel == DIV_SEL_REF_CLK ||
910                                 div_sel == DIV_SEL_DIVIDED_REF) {
911                         if (div_sel == DIV_SEL_REF_CLK)
912                                 freq = PLL_REF_CLK;
913                         else
914                                 freq = PLL_REF_CLK / (div_fctr + 1);
915                 } else if (div_sel == DIV_SEL_PLL_CLK ||
916                         div_sel == DIV_SEL_DIVIDED_PLL) {
917                         pll_clk = PLL_REF_CLK * (nf + 1) /
918                                         ((nr + 1) * (od + 1));
919                         if (div_sel == DIV_SEL_PLL_CLK)
920                                 freq = pll_clk;
921                         else
922                                 freq = pll_clk / (div_fctr + 1);
923                 } else {
924                         dev_warn(hdev->dev,
925                                 "Received invalid div select value: %d",
926                                 div_sel);
927                         freq = 0;
928                 }
929         }
930
931         prop->psoc_timestamp_frequency = freq;
932         prop->psoc_pci_pll_nr = nr;
933         prop->psoc_pci_pll_nf = nf;
934         prop->psoc_pci_pll_od = od;
935         prop->psoc_pci_pll_div_factor = div_fctr;
936
937         return 0;
938 }
939
940 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
941                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
942 {
943         struct asic_fixed_properties *prop = &hdev->asic_prop;
944         struct packet_lin_dma *init_tpc_mem_pkt;
945         struct hl_cs_job *job;
946         struct hl_cb *cb;
947         u64 dst_addr;
948         u32 cb_size, ctl;
949         u8 tpc_id;
950         int rc;
951
952         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
953         if (!cb)
954                 return -EFAULT;
955
956         init_tpc_mem_pkt = cb->kernel_address;
957         cb_size = sizeof(*init_tpc_mem_pkt);
958         memset(init_tpc_mem_pkt, 0, cb_size);
959
960         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
961
962         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
963         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
964         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
965         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
966
967         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
968
969         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
970         dst_addr = (prop->sram_user_base_address &
971                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
972                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
973         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
974
975         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
976         if (!job) {
977                 dev_err(hdev->dev, "Failed to allocate a new job\n");
978                 rc = -ENOMEM;
979                 goto release_cb;
980         }
981
982         job->id = 0;
983         job->user_cb = cb;
984         atomic_inc(&job->user_cb->cs_cnt);
985         job->user_cb_size = cb_size;
986         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
987         job->patched_cb = job->user_cb;
988         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
989
990         hl_debugfs_add_job(hdev, job);
991
992         rc = gaudi_send_job_on_qman0(hdev, job);
993
994         if (rc)
995                 goto free_job;
996
997         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
998                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
999                 if (rc)
1000                         break;
1001         }
1002
1003 free_job:
1004         hl_userptr_delete_list(hdev, &job->userptr_list);
1005         hl_debugfs_remove_job(hdev, job);
1006         kfree(job);
1007         atomic_dec(&cb->cs_cnt);
1008
1009 release_cb:
1010         hl_cb_put(cb);
1011         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1012
1013         return rc;
1014 }
1015
1016 /*
1017  * gaudi_init_tpc_mem() - Initialize TPC memories.
1018  * @hdev: Pointer to hl_device structure.
1019  *
1020  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1021  *
1022  * Return: 0 for success, negative value for error.
1023  */
1024 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1025 {
1026         const struct firmware *fw;
1027         size_t fw_size;
1028         void *cpu_addr;
1029         dma_addr_t dma_handle;
1030         int rc, count = 5;
1031
1032 again:
1033         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1034         if (rc == -EINTR && count-- > 0) {
1035                 msleep(50);
1036                 goto again;
1037         }
1038
1039         if (rc) {
1040                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1041                                 GAUDI_TPC_FW_FILE);
1042                 goto out;
1043         }
1044
1045         fw_size = fw->size;
1046         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1047                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1048         if (!cpu_addr) {
1049                 dev_err(hdev->dev,
1050                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1051                         fw_size);
1052                 rc = -ENOMEM;
1053                 goto out;
1054         }
1055
1056         memcpy(cpu_addr, fw->data, fw_size);
1057
1058         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1059
1060         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1061                         dma_handle);
1062
1063 out:
1064         release_firmware(fw);
1065         return rc;
1066 }
1067
1068 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1069 {
1070         struct gaudi_device *gaudi = hdev->asic_specific;
1071         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1072         struct hl_hw_queue *q;
1073         u32 i, sob_id, sob_group_id, queue_id;
1074
1075         /* Iterate through SOB groups and assign a SOB for each slave queue */
1076         sob_group_id =
1077                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1078         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1079
1080         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1081         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1082                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1083                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1084         }
1085
1086         /* Both DMA5 and TPC7 use the same resources since only a single
1087          * engine need to participate in the reduction process
1088          */
1089         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1090         q = &hdev->kernel_queues[queue_id];
1091         q->sync_stream_prop.collective_sob_id =
1092                         sob_id + NIC_NUMBER_OF_ENGINES;
1093
1094         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1095         q = &hdev->kernel_queues[queue_id];
1096         q->sync_stream_prop.collective_sob_id =
1097                         sob_id + NIC_NUMBER_OF_ENGINES;
1098 }
1099
1100 static void gaudi_sob_group_hw_reset(struct kref *ref)
1101 {
1102         struct gaudi_hw_sob_group *hw_sob_group =
1103                 container_of(ref, struct gaudi_hw_sob_group, kref);
1104         struct hl_device *hdev = hw_sob_group->hdev;
1105         int i;
1106
1107         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1108                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1109                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1110
1111         kref_init(&hw_sob_group->kref);
1112 }
1113
1114 static void gaudi_sob_group_reset_error(struct kref *ref)
1115 {
1116         struct gaudi_hw_sob_group *hw_sob_group =
1117                 container_of(ref, struct gaudi_hw_sob_group, kref);
1118         struct hl_device *hdev = hw_sob_group->hdev;
1119
1120         dev_crit(hdev->dev,
1121                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1122                 hw_sob_group->base_sob_id);
1123 }
1124
1125 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1126 {
1127         struct gaudi_collective_properties *prop;
1128         int i;
1129
1130         prop = &gaudi->collective_props;
1131
1132         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1133
1134         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1135                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1136                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1137                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1138         /* Set collective engine bit */
1139         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1140                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1141 }
1142
1143 static int gaudi_collective_init(struct hl_device *hdev)
1144 {
1145         u32 i, sob_id, reserved_sobs_per_group;
1146         struct gaudi_collective_properties *prop;
1147         struct gaudi_device *gaudi;
1148
1149         gaudi = hdev->asic_specific;
1150         prop = &gaudi->collective_props;
1151         sob_id = hdev->asic_prop.collective_first_sob;
1152
1153         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1154         reserved_sobs_per_group =
1155                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1156
1157         /* Init SOB groups */
1158         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1159                 prop->hw_sob_group[i].hdev = hdev;
1160                 prop->hw_sob_group[i].base_sob_id = sob_id;
1161                 sob_id += reserved_sobs_per_group;
1162                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1163         }
1164
1165         for (i = 0 ; i < QMAN_STREAMS; i++) {
1166                 prop->next_sob_group_val[i] = 1;
1167                 prop->curr_sob_group_idx[i] = 0;
1168                 gaudi_collective_map_sobs(hdev, i);
1169         }
1170
1171         gaudi_collective_mstr_sob_mask_set(gaudi);
1172
1173         return 0;
1174 }
1175
1176 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1177 {
1178         struct gaudi_device *gaudi = hdev->asic_specific;
1179         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1180
1181         kref_put(&cprop->hw_sob_group[sob_group].kref,
1182                                         gaudi_sob_group_hw_reset);
1183 }
1184
1185 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1186                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1187 {
1188         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1189         struct gaudi_collective_properties *cprop;
1190         struct hl_gen_wait_properties wait_prop;
1191         struct hl_sync_stream_properties *prop;
1192         struct gaudi_device *gaudi;
1193
1194         gaudi = hdev->asic_specific;
1195         cprop = &gaudi->collective_props;
1196         queue_id = job->hw_queue_id;
1197         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1198
1199         master_sob_base =
1200                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1201         master_monitor = prop->collective_mstr_mon_id[0];
1202
1203         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1204
1205         dev_dbg(hdev->dev,
1206                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1207                 master_sob_base, cprop->mstr_sob_mask[0],
1208                 cprop->next_sob_group_val[stream],
1209                 master_monitor, queue_id);
1210
1211         wait_prop.data = (void *) job->patched_cb;
1212         wait_prop.sob_base = master_sob_base;
1213         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1214         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1215         wait_prop.mon_id = master_monitor;
1216         wait_prop.q_idx = queue_id;
1217         wait_prop.size = cb_size;
1218         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1219
1220         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1221         master_monitor = prop->collective_mstr_mon_id[1];
1222
1223         dev_dbg(hdev->dev,
1224                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1225                 master_sob_base, cprop->mstr_sob_mask[1],
1226                 cprop->next_sob_group_val[stream],
1227                 master_monitor, queue_id);
1228
1229         wait_prop.sob_base = master_sob_base;
1230         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1231         wait_prop.mon_id = master_monitor;
1232         wait_prop.size = cb_size;
1233         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1234 }
1235
1236 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1237                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1238 {
1239         struct hl_gen_wait_properties wait_prop;
1240         struct hl_sync_stream_properties *prop;
1241         u32 queue_id, cb_size = 0;
1242
1243         queue_id = job->hw_queue_id;
1244         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1245
1246         if (job->cs->encaps_signals) {
1247                 /* use the encaps signal handle store earlier in the flow
1248                  * and set the SOB information from the encaps
1249                  * signals handle
1250                  */
1251                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1252                                                 cs_cmpl);
1253
1254                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1255                                 job->cs->sequence,
1256                                 cs_cmpl->hw_sob->sob_id,
1257                                 cs_cmpl->sob_val);
1258         }
1259
1260         /* Add to wait CBs using slave monitor */
1261         wait_prop.data = (void *) job->user_cb;
1262         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1263         wait_prop.sob_mask = 0x1;
1264         wait_prop.sob_val = cs_cmpl->sob_val;
1265         wait_prop.mon_id = prop->collective_slave_mon_id;
1266         wait_prop.q_idx = queue_id;
1267         wait_prop.size = cb_size;
1268
1269         dev_dbg(hdev->dev,
1270                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1271                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1272                 prop->collective_slave_mon_id, queue_id);
1273
1274         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1275
1276         dev_dbg(hdev->dev,
1277                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1278                 prop->collective_sob_id, queue_id);
1279
1280         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1281                         prop->collective_sob_id, cb_size, false);
1282 }
1283
1284 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1285 {
1286         struct hl_cs_compl *signal_cs_cmpl =
1287                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1288         struct hl_cs_compl *cs_cmpl =
1289                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1290         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1291         struct gaudi_collective_properties *cprop;
1292         u32 stream, queue_id, sob_group_offset;
1293         struct gaudi_device *gaudi;
1294         struct hl_device *hdev;
1295         struct hl_cs_job *job;
1296         struct hl_ctx *ctx;
1297
1298         ctx = cs->ctx;
1299         hdev = ctx->hdev;
1300         gaudi = hdev->asic_specific;
1301         cprop = &gaudi->collective_props;
1302
1303         if (cs->encaps_signals) {
1304                 cs_cmpl->hw_sob = handle->hw_sob;
1305                 /* at this checkpoint we only need the hw_sob pointer
1306                  * for the completion check before start going over the jobs
1307                  * of the master/slaves, the sob_value will be taken later on
1308                  * in gaudi_collective_slave_init_job depends on each
1309                  * job wait offset value.
1310                  */
1311                 cs_cmpl->sob_val = 0;
1312         } else {
1313                 /* copy the SOB id and value of the signal CS */
1314                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1315                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1316         }
1317
1318         /* check again if the signal cs already completed.
1319          * if yes then don't send any wait cs since the hw_sob
1320          * could be in reset already. if signal is not completed
1321          * then get refcount to hw_sob to prevent resetting the sob
1322          * while wait cs is not submitted.
1323          * note that this check is protected by two locks,
1324          * hw queue lock and completion object lock,
1325          * and the same completion object lock also protects
1326          * the hw_sob reset handler function.
1327          * The hw_queue lock prevent out of sync of hw_sob
1328          * refcount value, changed by signal/wait flows.
1329          */
1330         spin_lock(&signal_cs_cmpl->lock);
1331
1332         if (completion_done(&cs->signal_fence->completion)) {
1333                 spin_unlock(&signal_cs_cmpl->lock);
1334                 return -EINVAL;
1335         }
1336         /* Increment kref since all slave queues are now waiting on it */
1337         kref_get(&cs_cmpl->hw_sob->kref);
1338
1339         spin_unlock(&signal_cs_cmpl->lock);
1340
1341         /* Calculate the stream from collective master queue (1st job) */
1342         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1343         stream = job->hw_queue_id % 4;
1344         sob_group_offset =
1345                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1346
1347         list_for_each_entry(job, &cs->job_list, cs_node) {
1348                 queue_id = job->hw_queue_id;
1349
1350                 if (hdev->kernel_queues[queue_id].collective_mode ==
1351                                 HL_COLLECTIVE_MASTER)
1352                         gaudi_collective_master_init_job(hdev, job, stream,
1353                                                 sob_group_offset);
1354                 else
1355                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1356         }
1357
1358         cs_cmpl->sob_group = sob_group_offset;
1359
1360         /* Handle sob group kref and wraparound */
1361         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1362         cprop->next_sob_group_val[stream]++;
1363
1364         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1365                 /*
1366                  * Decrement as we reached the max value.
1367                  * The release function won't be called here as we've
1368                  * just incremented the refcount.
1369                  */
1370                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1371                                 gaudi_sob_group_reset_error);
1372                 cprop->next_sob_group_val[stream] = 1;
1373                 /* only two SOBs are currently in use */
1374                 cprop->curr_sob_group_idx[stream] =
1375                         (cprop->curr_sob_group_idx[stream] + 1) &
1376                                                         (HL_RSVD_SOBS - 1);
1377
1378                 gaudi_collective_map_sobs(hdev, stream);
1379
1380                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1381                                 cprop->curr_sob_group_idx[stream], stream);
1382         }
1383
1384         mb();
1385         hl_fence_put(cs->signal_fence);
1386         cs->signal_fence = NULL;
1387
1388         return 0;
1389 }
1390
1391 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1392                 struct hl_ctx *ctx, struct hl_cs *cs,
1393                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1394                 u32 encaps_signal_offset)
1395 {
1396         struct hw_queue_properties *hw_queue_prop;
1397         struct hl_cs_counters_atomic *cntr;
1398         struct hl_cs_job *job;
1399         struct hl_cb *cb;
1400         u32 cb_size;
1401         bool patched_cb;
1402
1403         cntr = &hdev->aggregated_cs_counters;
1404
1405         if (mode == HL_COLLECTIVE_MASTER) {
1406                 /* CB size of collective master queue contains
1407                  * 4 msg short packets for monitor 1 configuration
1408                  * 1 fence packet
1409                  * 4 msg short packets for monitor 2 configuration
1410                  * 1 fence packet
1411                  * 2 msg prot packets for completion and MSI-X
1412                  */
1413                 cb_size = sizeof(struct packet_msg_short) * 8 +
1414                                 sizeof(struct packet_fence) * 2 +
1415                                 sizeof(struct packet_msg_prot) * 2;
1416                 patched_cb = true;
1417         } else {
1418                 /* CB size of collective slave queues contains
1419                  * 4 msg short packets for monitor configuration
1420                  * 1 fence packet
1421                  * 1 additional msg short packet for sob signal
1422                  */
1423                 cb_size = sizeof(struct packet_msg_short) * 5 +
1424                                 sizeof(struct packet_fence);
1425                 patched_cb = false;
1426         }
1427
1428         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1429         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1430         if (!job) {
1431                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1432                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1433                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1434                 return -ENOMEM;
1435         }
1436
1437         /* Allocate internal mapped CB for non patched CBs */
1438         cb = hl_cb_kernel_create(hdev, cb_size,
1439                         hdev->mmu_enable && !patched_cb);
1440         if (!cb) {
1441                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1442                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1443                 kfree(job);
1444                 return -EFAULT;
1445         }
1446
1447         job->id = 0;
1448         job->cs = cs;
1449         job->user_cb = cb;
1450         atomic_inc(&job->user_cb->cs_cnt);
1451         job->user_cb_size = cb_size;
1452         job->hw_queue_id = queue_id;
1453
1454         /* since its guaranteed to have only one chunk in the collective wait
1455          * cs, we can use this chunk to set the encapsulated signal offset
1456          * in the jobs.
1457          */
1458         if (cs->encaps_signals)
1459                 job->encaps_sig_wait_offset = encaps_signal_offset;
1460
1461         /*
1462          * No need in parsing, user CB is the patched CB.
1463          * We call hl_cb_destroy() out of two reasons - we don't need
1464          * the CB in the CB idr anymore and to decrement its refcount as
1465          * it was incremented inside hl_cb_kernel_create().
1466          */
1467         if (patched_cb)
1468                 job->patched_cb = job->user_cb;
1469         else
1470                 job->patched_cb = NULL;
1471
1472         job->job_cb_size = job->user_cb_size;
1473         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1474
1475         /* increment refcount as for external queues we get completion */
1476         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1477                 cs_get(cs);
1478
1479         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1480
1481         list_add_tail(&job->cs_node, &cs->job_list);
1482
1483         hl_debugfs_add_job(hdev, job);
1484
1485         return 0;
1486 }
1487
1488 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1489                 struct hl_ctx *ctx, struct hl_cs *cs,
1490                 u32 wait_queue_id, u32 collective_engine_id,
1491                 u32 encaps_signal_offset)
1492 {
1493         struct gaudi_device *gaudi = hdev->asic_specific;
1494         struct hw_queue_properties *hw_queue_prop;
1495         u32 queue_id, collective_queue, num_jobs;
1496         u32 stream, nic_queue, nic_idx = 0;
1497         bool skip;
1498         int i, rc = 0;
1499
1500         /* Verify wait queue id is configured as master */
1501         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1502         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1503                 dev_err(hdev->dev,
1504                         "Queue %d is not configured as collective master\n",
1505                         wait_queue_id);
1506                 return -EINVAL;
1507         }
1508
1509         /* Verify engine id is supported */
1510         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1511                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1512                 dev_err(hdev->dev,
1513                         "Collective wait does not support engine %u\n",
1514                         collective_engine_id);
1515                 return -EINVAL;
1516         }
1517
1518         stream = wait_queue_id % 4;
1519
1520         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1521                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1522         else
1523                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1524
1525         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1526         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1527
1528         /* First job goes to the collective master queue, it will wait for
1529          * the collective slave queues to finish execution.
1530          * The synchronization is done using two monitors:
1531          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1532          * reduction engine (DMA5/TPC7).
1533          *
1534          * Rest of the jobs goes to the collective slave queues which will
1535          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1536          */
1537         for (i = 0 ; i < num_jobs ; i++) {
1538                 if (i == 0) {
1539                         queue_id = wait_queue_id;
1540                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1541                                 HL_COLLECTIVE_MASTER, queue_id,
1542                                 wait_queue_id, encaps_signal_offset);
1543                 } else {
1544                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1545                                 if (gaudi->hw_cap_initialized &
1546                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1547                                         skip = false;
1548                                 else
1549                                         skip = true;
1550
1551                                 queue_id = nic_queue;
1552                                 nic_queue += 4;
1553                                 nic_idx++;
1554
1555                                 if (skip)
1556                                         continue;
1557                         } else {
1558                                 queue_id = collective_queue;
1559                         }
1560
1561                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1562                                 HL_COLLECTIVE_SLAVE, queue_id,
1563                                 wait_queue_id, encaps_signal_offset);
1564                 }
1565
1566                 if (rc)
1567                         return rc;
1568         }
1569
1570         return rc;
1571 }
1572
1573 static int gaudi_late_init(struct hl_device *hdev)
1574 {
1575         struct gaudi_device *gaudi = hdev->asic_specific;
1576         int rc;
1577
1578         rc = gaudi->cpucp_info_get(hdev);
1579         if (rc) {
1580                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1581                 return rc;
1582         }
1583
1584         if ((hdev->card_type == cpucp_card_type_pci) &&
1585                         (hdev->nic_ports_mask & 0x3)) {
1586                 dev_info(hdev->dev,
1587                         "PCI card detected, only 8 ports are enabled\n");
1588                 hdev->nic_ports_mask &= ~0x3;
1589
1590                 /* Stop and disable unused NIC QMANs */
1591                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1592                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1593                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1594
1595                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1596                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1597                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1598
1599                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1600                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1601
1602                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1603         }
1604
1605         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1606         if (rc) {
1607                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1608                 return rc;
1609         }
1610
1611         /* Scrub both SRAM and DRAM */
1612         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1613         if (rc)
1614                 goto disable_pci_access;
1615
1616         rc = gaudi_fetch_psoc_frequency(hdev);
1617         if (rc) {
1618                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1619                 goto disable_pci_access;
1620         }
1621
1622         rc = gaudi_mmu_clear_pgt_range(hdev);
1623         if (rc) {
1624                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1625                 goto disable_pci_access;
1626         }
1627
1628         rc = gaudi_init_tpc_mem(hdev);
1629         if (rc) {
1630                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1631                 goto disable_pci_access;
1632         }
1633
1634         rc = gaudi_collective_init(hdev);
1635         if (rc) {
1636                 dev_err(hdev->dev, "Failed to init collective\n");
1637                 goto disable_pci_access;
1638         }
1639
1640         /* We only support a single ASID for the user, so for the sake of optimization, just
1641          * initialize the ASID one time during device initialization with the fixed value of 1
1642          */
1643         gaudi_mmu_prepare(hdev, 1);
1644
1645         hl_fw_set_pll_profile(hdev);
1646
1647         return 0;
1648
1649 disable_pci_access:
1650         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1651
1652         return rc;
1653 }
1654
1655 static void gaudi_late_fini(struct hl_device *hdev)
1656 {
1657         const struct hwmon_channel_info **channel_info_arr;
1658         int i = 0;
1659
1660         if (!hdev->hl_chip_info->info)
1661                 return;
1662
1663         channel_info_arr = hdev->hl_chip_info->info;
1664
1665         while (channel_info_arr[i]) {
1666                 kfree(channel_info_arr[i]->config);
1667                 kfree(channel_info_arr[i]);
1668                 i++;
1669         }
1670
1671         kfree(channel_info_arr);
1672
1673         hdev->hl_chip_info->info = NULL;
1674 }
1675
1676 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1677 {
1678         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1679         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1680         int i, j, rc = 0;
1681
1682         /*
1683          * The device CPU works with 40-bits addresses, while bit 39 must be set
1684          * to '1' when accessing the host.
1685          * Bits 49:39 of the full host address are saved for a later
1686          * configuration of the HW to perform extension to 50 bits.
1687          * Because there is a single HW register that holds the extension bits,
1688          * these bits must be identical in all allocated range.
1689          */
1690
1691         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1692                 virt_addr_arr[i] =
1693                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1694                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1695                                                 &dma_addr_arr[i],
1696                                                 GFP_KERNEL | __GFP_ZERO);
1697                 if (!virt_addr_arr[i]) {
1698                         rc = -ENOMEM;
1699                         goto free_dma_mem_arr;
1700                 }
1701
1702                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1703                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1704                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1705                         break;
1706         }
1707
1708         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1709                 dev_err(hdev->dev,
1710                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1711                 rc = -EFAULT;
1712                 goto free_dma_mem_arr;
1713         }
1714
1715         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1716         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1717         hdev->cpu_pci_msb_addr =
1718                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1719
1720         if (!hdev->asic_prop.fw_security_enabled)
1721                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1722
1723 free_dma_mem_arr:
1724         for (j = 0 ; j < i ; j++)
1725                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1726                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1727                                                 virt_addr_arr[j],
1728                                                 dma_addr_arr[j]);
1729
1730         return rc;
1731 }
1732
1733 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1734 {
1735         struct gaudi_device *gaudi = hdev->asic_specific;
1736         struct gaudi_internal_qman_info *q;
1737         u32 i;
1738
1739         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1740                 q = &gaudi->internal_qmans[i];
1741                 if (!q->pq_kernel_addr)
1742                         continue;
1743                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1744                                                         q->pq_kernel_addr,
1745                                                         q->pq_dma_addr);
1746         }
1747 }
1748
1749 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1750 {
1751         struct gaudi_device *gaudi = hdev->asic_specific;
1752         struct gaudi_internal_qman_info *q;
1753         int rc, i;
1754
1755         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1756                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1757                         continue;
1758
1759                 q = &gaudi->internal_qmans[i];
1760
1761                 switch (i) {
1762                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1763                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1764                         break;
1765                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1766                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1767                         break;
1768                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1769                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1770                         break;
1771                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1772                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1773                         break;
1774                 default:
1775                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1776                         rc = -EINVAL;
1777                         goto free_internal_qmans_pq_mem;
1778                 }
1779
1780                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1781                                                 hdev, q->pq_size,
1782                                                 &q->pq_dma_addr,
1783                                                 GFP_KERNEL | __GFP_ZERO);
1784                 if (!q->pq_kernel_addr) {
1785                         rc = -ENOMEM;
1786                         goto free_internal_qmans_pq_mem;
1787                 }
1788         }
1789
1790         return 0;
1791
1792 free_internal_qmans_pq_mem:
1793         gaudi_free_internal_qmans_pq_mem(hdev);
1794         return rc;
1795 }
1796
1797 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1798 {
1799         struct asic_fixed_properties *prop = &hdev->asic_prop;
1800         struct pci_mem_region *region;
1801
1802         /* CFG */
1803         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1804         region->region_base = CFG_BASE;
1805         region->region_size = CFG_SIZE;
1806         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1807         region->bar_size = CFG_BAR_SIZE;
1808         region->bar_id = CFG_BAR_ID;
1809         region->used = 1;
1810
1811         /* SRAM */
1812         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1813         region->region_base = SRAM_BASE_ADDR;
1814         region->region_size = SRAM_SIZE;
1815         region->offset_in_bar = 0;
1816         region->bar_size = SRAM_BAR_SIZE;
1817         region->bar_id = SRAM_BAR_ID;
1818         region->used = 1;
1819
1820         /* DRAM */
1821         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1822         region->region_base = DRAM_PHYS_BASE;
1823         region->region_size = hdev->asic_prop.dram_size;
1824         region->offset_in_bar = 0;
1825         region->bar_size = prop->dram_pci_bar_size;
1826         region->bar_id = HBM_BAR_ID;
1827         region->used = 1;
1828
1829         /* SP SRAM */
1830         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1831         region->region_base = PSOC_SCRATCHPAD_ADDR;
1832         region->region_size = PSOC_SCRATCHPAD_SIZE;
1833         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1834         region->bar_size = CFG_BAR_SIZE;
1835         region->bar_id = CFG_BAR_ID;
1836         region->used = 1;
1837 }
1838
1839 static int gaudi_sw_init(struct hl_device *hdev)
1840 {
1841         struct gaudi_device *gaudi;
1842         u32 i, event_id = 0;
1843         int rc;
1844
1845         /* Allocate device structure */
1846         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1847         if (!gaudi)
1848                 return -ENOMEM;
1849
1850         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1851                 if (gaudi_irq_map_table[i].valid) {
1852                         if (event_id == GAUDI_EVENT_SIZE) {
1853                                 dev_err(hdev->dev,
1854                                         "Event array exceeds the limit of %u events\n",
1855                                         GAUDI_EVENT_SIZE);
1856                                 rc = -EINVAL;
1857                                 goto free_gaudi_device;
1858                         }
1859
1860                         gaudi->events[event_id++] =
1861                                         gaudi_irq_map_table[i].fc_id;
1862                 }
1863         }
1864
1865         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1866
1867         hdev->asic_specific = gaudi;
1868
1869         /* Create DMA pool for small allocations */
1870         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1871                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1872         if (!hdev->dma_pool) {
1873                 dev_err(hdev->dev, "failed to create DMA pool\n");
1874                 rc = -ENOMEM;
1875                 goto free_gaudi_device;
1876         }
1877
1878         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1879         if (rc)
1880                 goto free_dma_pool;
1881
1882         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1883         if (!hdev->cpu_accessible_dma_pool) {
1884                 dev_err(hdev->dev,
1885                         "Failed to create CPU accessible DMA pool\n");
1886                 rc = -ENOMEM;
1887                 goto free_cpu_dma_mem;
1888         }
1889
1890         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1891                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1892                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1893         if (rc) {
1894                 dev_err(hdev->dev,
1895                         "Failed to add memory to CPU accessible DMA pool\n");
1896                 rc = -EFAULT;
1897                 goto free_cpu_accessible_dma_pool;
1898         }
1899
1900         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1901         if (rc)
1902                 goto free_cpu_accessible_dma_pool;
1903
1904         spin_lock_init(&gaudi->hw_queues_lock);
1905
1906         hdev->supports_sync_stream = true;
1907         hdev->supports_coresight = true;
1908         hdev->supports_staged_submission = true;
1909         hdev->supports_wait_for_multi_cs = true;
1910
1911         hdev->asic_funcs->set_pci_memory_regions(hdev);
1912         hdev->stream_master_qid_arr =
1913                                 hdev->asic_funcs->get_stream_master_qid_arr();
1914         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1915
1916         return 0;
1917
1918 free_cpu_accessible_dma_pool:
1919         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1920 free_cpu_dma_mem:
1921         if (!hdev->asic_prop.fw_security_enabled)
1922                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1923                                         hdev->cpu_pci_msb_addr);
1924         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1925                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1926                         hdev->cpu_accessible_dma_mem,
1927                         hdev->cpu_accessible_dma_address);
1928 free_dma_pool:
1929         dma_pool_destroy(hdev->dma_pool);
1930 free_gaudi_device:
1931         kfree(gaudi);
1932         return rc;
1933 }
1934
1935 static int gaudi_sw_fini(struct hl_device *hdev)
1936 {
1937         struct gaudi_device *gaudi = hdev->asic_specific;
1938
1939         gaudi_free_internal_qmans_pq_mem(hdev);
1940
1941         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1942
1943         if (!hdev->asic_prop.fw_security_enabled)
1944                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1945                                         hdev->cpu_pci_msb_addr);
1946
1947         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1948                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1949                         hdev->cpu_accessible_dma_mem,
1950                         hdev->cpu_accessible_dma_address);
1951
1952         dma_pool_destroy(hdev->dma_pool);
1953
1954         kfree(gaudi);
1955
1956         return 0;
1957 }
1958
1959 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1960 {
1961         struct hl_device *hdev = arg;
1962         int i;
1963
1964         if (hdev->disabled)
1965                 return IRQ_HANDLED;
1966
1967         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1968                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1969
1970         hl_irq_handler_eq(irq, &hdev->event_queue);
1971
1972         return IRQ_HANDLED;
1973 }
1974
1975 /*
1976  * For backward compatibility, new MSI interrupts should be set after the
1977  * existing CPU and NIC interrupts.
1978  */
1979 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1980                                 bool cpu_eq)
1981 {
1982         int msi_vec;
1983
1984         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1985                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1986                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1987
1988         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1989                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1990
1991         return pci_irq_vector(hdev->pdev, msi_vec);
1992 }
1993
1994 static int gaudi_enable_msi_single(struct hl_device *hdev)
1995 {
1996         int rc, irq;
1997
1998         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1999
2000         irq = gaudi_pci_irq_vector(hdev, 0, false);
2001         rc = request_irq(irq, gaudi_irq_handler_single, 0,
2002                         "gaudi single msi", hdev);
2003         if (rc)
2004                 dev_err(hdev->dev,
2005                         "Failed to request single MSI IRQ\n");
2006
2007         return rc;
2008 }
2009
2010 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2011 {
2012         int cq_cnt = hdev->asic_prop.completion_queues_count;
2013         int rc, i, irq_cnt_init, irq;
2014
2015         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2016                 irq = gaudi_pci_irq_vector(hdev, i, false);
2017                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2018                                 &hdev->completion_queue[i]);
2019                 if (rc) {
2020                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2021                         goto free_irqs;
2022                 }
2023         }
2024
2025         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2026         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2027                                 &hdev->event_queue);
2028         if (rc) {
2029                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2030                 goto free_irqs;
2031         }
2032
2033         return 0;
2034
2035 free_irqs:
2036         for (i = 0 ; i < irq_cnt_init ; i++)
2037                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2038                                 &hdev->completion_queue[i]);
2039         return rc;
2040 }
2041
2042 static int gaudi_enable_msi(struct hl_device *hdev)
2043 {
2044         struct gaudi_device *gaudi = hdev->asic_specific;
2045         int rc;
2046
2047         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2048                 return 0;
2049
2050         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2051         if (rc < 0) {
2052                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2053                 return rc;
2054         }
2055
2056         if (rc < NUMBER_OF_INTERRUPTS) {
2057                 gaudi->multi_msi_mode = false;
2058                 rc = gaudi_enable_msi_single(hdev);
2059         } else {
2060                 gaudi->multi_msi_mode = true;
2061                 rc = gaudi_enable_msi_multi(hdev);
2062         }
2063
2064         if (rc)
2065                 goto free_pci_irq_vectors;
2066
2067         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2068
2069         return 0;
2070
2071 free_pci_irq_vectors:
2072         pci_free_irq_vectors(hdev->pdev);
2073         return rc;
2074 }
2075
2076 static void gaudi_sync_irqs(struct hl_device *hdev)
2077 {
2078         struct gaudi_device *gaudi = hdev->asic_specific;
2079         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2080
2081         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2082                 return;
2083
2084         /* Wait for all pending IRQs to be finished */
2085         if (gaudi->multi_msi_mode) {
2086                 for (i = 0 ; i < cq_cnt ; i++)
2087                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2088
2089                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2090                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2091                                                 true));
2092         } else {
2093                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2094         }
2095 }
2096
2097 static void gaudi_disable_msi(struct hl_device *hdev)
2098 {
2099         struct gaudi_device *gaudi = hdev->asic_specific;
2100         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2101
2102         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2103                 return;
2104
2105         gaudi_sync_irqs(hdev);
2106
2107         if (gaudi->multi_msi_mode) {
2108                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2109                                                 true);
2110                 free_irq(irq, &hdev->event_queue);
2111
2112                 for (i = 0 ; i < cq_cnt ; i++) {
2113                         irq = gaudi_pci_irq_vector(hdev, i, false);
2114                         free_irq(irq, &hdev->completion_queue[i]);
2115                 }
2116         } else {
2117                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2118         }
2119
2120         pci_free_irq_vectors(hdev->pdev);
2121
2122         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2123 }
2124
2125 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2126 {
2127         struct gaudi_device *gaudi = hdev->asic_specific;
2128
2129         if (hdev->asic_prop.fw_security_enabled)
2130                 return;
2131
2132         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2133                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2134                 return;
2135
2136         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2137                 return;
2138
2139         if (!hdev->sram_scrambler_enable)
2140                 return;
2141
2142         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2143                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2144         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2145                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2146         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2147                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2148         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2149                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2150         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2151                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2152         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2153                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2154         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2155                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2157                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158
2159         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2160                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2162                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2164                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2166                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2168                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2170                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2172                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2174                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2175
2176         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2177                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2178         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2179                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2180         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2181                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2182         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2183                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2184         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2185                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2186         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2187                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2188         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2189                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2191                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2192
2193         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2194 }
2195
2196 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2197 {
2198         struct gaudi_device *gaudi = hdev->asic_specific;
2199
2200         if (hdev->asic_prop.fw_security_enabled)
2201                 return;
2202
2203         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2204                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2205                 return;
2206
2207         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2208                 return;
2209
2210         if (!hdev->dram_scrambler_enable)
2211                 return;
2212
2213         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2214                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2215         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2216                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2217         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2218                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2219         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2220                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2221         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2222                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2223         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2224                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2225         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2226                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2228                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229
2230         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2231                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2232         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2233                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2234         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2235                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2236         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2237                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2238         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2239                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2240         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2241                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2242         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2243                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2244         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2245                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2246
2247         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2248                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2249         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2250                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2251         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2252                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2253         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2254                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2255         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2256                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2257         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2258                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2259         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2260                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2261         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2262                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2263
2264         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2265 }
2266
2267 static void gaudi_init_e2e(struct hl_device *hdev)
2268 {
2269         if (hdev->asic_prop.fw_security_enabled)
2270                 return;
2271
2272         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2273                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2274                 return;
2275
2276         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2277         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2278         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2279         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2280
2281         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2282         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2283         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2284         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2285
2286         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2287         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2288         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2289         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2290
2291         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2292         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2293         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2294         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2295
2296         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2297         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2298         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2299         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2300
2301         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2302         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2303         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2304         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2305
2306         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2307         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2308         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2309         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2310
2311         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2312         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2313         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2314         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2315
2316         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2317         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2318         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2319         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2320
2321         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2322         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2323         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2324         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2325
2326         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2327         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2328         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2329         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2330
2331         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2332         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2333         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2334         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2335
2336         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2337         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2338         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2339         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2340
2341         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2342         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2343         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2344         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2345
2346         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2347         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2348         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2349         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2350
2351         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2352         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2353         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2354         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2355
2356         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2357         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2358         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2359         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2360
2361         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2362         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2363         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2364         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2365
2366         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2367         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2368         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2369         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2370
2371         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2372         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2373         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2374         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2375
2376         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2377         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2378         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2379         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2380
2381         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2382         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2383         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2384         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2385
2386         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2387         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2388         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2389         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2390
2391         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2392         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2393         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2394         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2395
2396         if (!hdev->dram_scrambler_enable) {
2397                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2398                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2399                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2400                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2401
2402                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2403                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2404                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2405                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2406
2407                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2408                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2409                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2410                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2411
2412                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2413                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2414                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2415                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2416
2417                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2418                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2419                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2420                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2421
2422                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2423                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2424                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2425                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2426
2427                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2428                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2429                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2430                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2431
2432                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2433                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2434                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2435                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2436
2437                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2438                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2439                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2440                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2441
2442                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2443                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2444                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2445                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2446
2447                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2448                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2449                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2450                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2451
2452                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2453                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2454                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2455                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2456
2457                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2458                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2459                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2460                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2461
2462                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2463                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2464                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2465                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2466
2467                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2468                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2469                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2470                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2471
2472                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2473                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2474                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2475                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2476
2477                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2478                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2479                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2480                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2481
2482                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2483                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2484                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2485                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2486
2487                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2488                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2489                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2490                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2491
2492                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2493                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2494                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2495                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2496
2497                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2498                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2499                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2500                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2501
2502                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2503                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2504                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2505                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2506
2507                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2508                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2509                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2510                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2511
2512                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2513                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2514                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2515                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2516         }
2517
2518         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2519                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2520         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2521                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2522
2523         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2524                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2525         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2526                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2527
2528         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2529                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2530         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2531                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2532
2533         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2534                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2535         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2536                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2537
2538         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2539                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2540         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2541                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2542
2543         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2544                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2545         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2546                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2547
2548         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2549                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2550         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2551                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2552
2553         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2554                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2555         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2556                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2557
2558         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2559                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2560         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2561                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2562
2563         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2564                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2565         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2566                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2567
2568         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2569                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2570         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2571                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2572
2573         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2574                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2575         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2576                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2577
2578         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2579                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2580         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2581                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2582
2583         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2584                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2585         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2586                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2587
2588         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2589                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2590         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2591                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2592
2593         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2594                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2595         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2596                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2597
2598         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2599                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2600         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2601                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2602
2603         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2604                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2605         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2606                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2607
2608         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2609                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2610         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2611                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2612
2613         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2614                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2615         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2616                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2617
2618         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2619                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2620         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2621                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2622
2623         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2624                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2625         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2626                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2627
2628         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2629                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2630         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2631                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2632
2633         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2634                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2635         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2636                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2637 }
2638
2639 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2640 {
2641         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2642
2643         if (hdev->asic_prop.fw_security_enabled)
2644                 return;
2645
2646         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2647                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2648                 return;
2649
2650         hbm0_wr = 0x33333333;
2651         hbm0_rd = 0x77777777;
2652         hbm1_wr = 0x55555555;
2653         hbm1_rd = 0xDDDDDDDD;
2654
2655         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2656         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2657         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2658         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2659
2660         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2661         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2662         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2663         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2664
2665         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2666         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2667         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2668         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2669
2670         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2671         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2672         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2673         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2674
2675         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2676                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2677                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2678         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2679                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2680                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2681         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2682                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2683                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2684         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2685                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2686                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2687
2688         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2689                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2690                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2691         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2692                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2693                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2694         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2695                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2696                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2697         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2698                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2699                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2700 }
2701
2702 static void gaudi_init_golden_registers(struct hl_device *hdev)
2703 {
2704         u32 tpc_offset;
2705         int tpc_id, i;
2706
2707         gaudi_init_e2e(hdev);
2708         gaudi_init_hbm_cred(hdev);
2709
2710         for (tpc_id = 0, tpc_offset = 0;
2711                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2712                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2713                 /* Mask all arithmetic interrupts from TPC */
2714                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2715                 /* Set 16 cache lines */
2716                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2717                                 ICACHE_FETCH_LINE_NUM, 2);
2718         }
2719
2720         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2721         for (i = 0 ; i < 128 ; i += 8)
2722                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2723
2724         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2725         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2726         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2727         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2728 }
2729
2730 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2731                                         int qman_id, dma_addr_t qman_pq_addr)
2732 {
2733         struct cpu_dyn_regs *dyn_regs =
2734                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2735         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2736         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2737         u32 q_off, dma_qm_offset;
2738         u32 dma_qm_err_cfg, irq_handler_offset;
2739
2740         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2741
2742         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2743                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2744         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2745                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2746         so_base_en_lo = lower_32_bits(CFG_BASE +
2747                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2748         so_base_en_hi = upper_32_bits(CFG_BASE +
2749                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2750         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2751                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2752         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2753                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2754         so_base_ws_lo = lower_32_bits(CFG_BASE +
2755                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2756         so_base_ws_hi = upper_32_bits(CFG_BASE +
2757                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2758
2759         q_off = dma_qm_offset + qman_id * 4;
2760
2761         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2762         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2763
2764         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2765         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2766         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2767
2768         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2769         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2770                                                         QMAN_LDMA_SRC_OFFSET);
2771         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2772                                                         QMAN_LDMA_DST_OFFSET);
2773
2774         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2775         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2776         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2777         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2778         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2779         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2780         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2781         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2782
2783         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2784
2785         /* The following configuration is needed only once per QMAN */
2786         if (qman_id == 0) {
2787                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2788                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2789                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2790
2791                 /* Configure RAZWI IRQ */
2792                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2793                 if (hdev->stop_on_err)
2794                         dma_qm_err_cfg |=
2795                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2796
2797                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2798
2799                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2800                         lower_32_bits(CFG_BASE + irq_handler_offset));
2801                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2802                         upper_32_bits(CFG_BASE + irq_handler_offset));
2803
2804                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2805                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2806                                                                         dma_id);
2807
2808                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2809                                 QM_ARB_ERR_MSG_EN_MASK);
2810
2811                 /* Increase ARB WDT to support streams architecture */
2812                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2813                                 GAUDI_ARB_WDT_TIMEOUT);
2814
2815                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2816                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2817
2818                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2819         }
2820 }
2821
2822 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2823 {
2824         struct cpu_dyn_regs *dyn_regs =
2825                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2826         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2827         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2828         u32 irq_handler_offset;
2829
2830         /* Set to maximum possible according to physical size */
2831         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2832         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2833
2834         /* WA for H/W bug H3-2116 */
2835         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2836
2837         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2838         if (hdev->stop_on_err)
2839                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2840
2841         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2842
2843         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2844                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2845                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2846
2847         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2848                 lower_32_bits(CFG_BASE + irq_handler_offset));
2849         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2850                 upper_32_bits(CFG_BASE + irq_handler_offset));
2851
2852         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2853                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2854         WREG32(mmDMA0_CORE_PROT + dma_offset,
2855                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2856         /* If the channel is secured, it should be in MMU bypass mode */
2857         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2858                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2859         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2860 }
2861
2862 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2863                                 u32 enable_mask)
2864 {
2865         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2866
2867         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2868 }
2869
2870 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2871 {
2872         struct gaudi_device *gaudi = hdev->asic_specific;
2873         struct hl_hw_queue *q;
2874         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2875
2876         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2877                 return;
2878
2879         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2880                 dma_id = gaudi_dma_assignment[i];
2881                 /*
2882                  * For queues after the CPU Q need to add 1 to get the correct
2883                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2884                  * order to get the correct MSI register.
2885                  */
2886                 if (dma_id > 1) {
2887                         cpu_skip = 1;
2888                         nic_skip = NIC_NUMBER_OF_ENGINES;
2889                 } else {
2890                         cpu_skip = 0;
2891                         nic_skip = 0;
2892                 }
2893
2894                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2895                         q_idx = 4 * dma_id + j + cpu_skip;
2896                         q = &hdev->kernel_queues[q_idx];
2897                         q->cq_id = cq_id++;
2898                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2899                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2900                                                 q->bus_address);
2901                 }
2902
2903                 gaudi_init_dma_core(hdev, dma_id);
2904
2905                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2906         }
2907
2908         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2909 }
2910
2911 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2912                                         int qman_id, u64 qman_base_addr)
2913 {
2914         struct cpu_dyn_regs *dyn_regs =
2915                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2916         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2917         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2918         u32 dma_qm_err_cfg, irq_handler_offset;
2919         u32 q_off, dma_qm_offset;
2920
2921         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2922
2923         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2924                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2925         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2926                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2927         so_base_en_lo = lower_32_bits(CFG_BASE +
2928                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2929         so_base_en_hi = upper_32_bits(CFG_BASE +
2930                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2931         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2932                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2933         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2934                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2935         so_base_ws_lo = lower_32_bits(CFG_BASE +
2936                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2937         so_base_ws_hi = upper_32_bits(CFG_BASE +
2938                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2939
2940         q_off = dma_qm_offset + qman_id * 4;
2941
2942         if (qman_id < 4) {
2943                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2944                                         lower_32_bits(qman_base_addr));
2945                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2946                                         upper_32_bits(qman_base_addr));
2947
2948                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2949                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2950                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2951
2952                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2953                                                         QMAN_CPDMA_SIZE_OFFSET);
2954                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2955                                                         QMAN_CPDMA_SRC_OFFSET);
2956                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2957                                                         QMAN_CPDMA_DST_OFFSET);
2958         } else {
2959                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2960                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2961                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2962
2963                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2964                                                         QMAN_LDMA_SIZE_OFFSET);
2965                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2966                                                         QMAN_LDMA_SRC_OFFSET);
2967                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2968                                                         QMAN_LDMA_DST_OFFSET);
2969
2970                 /* Configure RAZWI IRQ */
2971                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2972                 if (hdev->stop_on_err)
2973                         dma_qm_err_cfg |=
2974                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2975
2976                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2977
2978                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2979                         lower_32_bits(CFG_BASE + irq_handler_offset));
2980                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2981                         upper_32_bits(CFG_BASE + irq_handler_offset));
2982
2983                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2984                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2985                                                                         dma_id);
2986
2987                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2988                                 QM_ARB_ERR_MSG_EN_MASK);
2989
2990                 /* Increase ARB WDT to support streams architecture */
2991                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2992                                 GAUDI_ARB_WDT_TIMEOUT);
2993
2994                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2995                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2996                                 QMAN_INTERNAL_MAKE_TRUSTED);
2997         }
2998
2999         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3000         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3001         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3002         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3003
3004         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
3005         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
3006                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3007                                 mtr_base_ws_lo);
3008                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3009                                 mtr_base_ws_hi);
3010                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3011                                 so_base_ws_lo);
3012                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3013                                 so_base_ws_hi);
3014         }
3015 }
3016
3017 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
3018 {
3019         struct gaudi_device *gaudi = hdev->asic_specific;
3020         struct gaudi_internal_qman_info *q;
3021         u64 qman_base_addr;
3022         int i, j, dma_id, internal_q_index;
3023
3024         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
3025                 return;
3026
3027         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
3028                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
3029
3030                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
3031                          /*
3032                           * Add the CPU queue in order to get the correct queue
3033                           * number as all internal queue are placed after it
3034                           */
3035                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
3036
3037                         q = &gaudi->internal_qmans[internal_q_index];
3038                         qman_base_addr = (u64) q->pq_dma_addr;
3039                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
3040                                                 qman_base_addr);
3041                 }
3042
3043                 /* Initializing lower CP for HBM DMA QMAN */
3044                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3045
3046                 gaudi_init_dma_core(hdev, dma_id);
3047
3048                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3049         }
3050
3051         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3052 }
3053
3054 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3055                                         int qman_id, u64 qman_base_addr)
3056 {
3057         struct cpu_dyn_regs *dyn_regs =
3058                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3059         u32 mtr_base_lo, mtr_base_hi;
3060         u32 so_base_lo, so_base_hi;
3061         u32 irq_handler_offset;
3062         u32 q_off, mme_id;
3063         u32 mme_qm_err_cfg;
3064
3065         mtr_base_lo = lower_32_bits(CFG_BASE +
3066                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3067         mtr_base_hi = upper_32_bits(CFG_BASE +
3068                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3069         so_base_lo = lower_32_bits(CFG_BASE +
3070                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3071         so_base_hi = upper_32_bits(CFG_BASE +
3072                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3073
3074         q_off = mme_offset + qman_id * 4;
3075
3076         if (qman_id < 4) {
3077                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3078                                         lower_32_bits(qman_base_addr));
3079                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3080                                         upper_32_bits(qman_base_addr));
3081
3082                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3083                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3084                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3085
3086                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3087                                                         QMAN_CPDMA_SIZE_OFFSET);
3088                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3089                                                         QMAN_CPDMA_SRC_OFFSET);
3090                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3091                                                         QMAN_CPDMA_DST_OFFSET);
3092         } else {
3093                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3094                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3095                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3096
3097                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3098                                                         QMAN_LDMA_SIZE_OFFSET);
3099                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3100                                                         QMAN_LDMA_SRC_OFFSET);
3101                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3102                                                         QMAN_LDMA_DST_OFFSET);
3103
3104                 /* Configure RAZWI IRQ */
3105                 mme_id = mme_offset /
3106                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3107
3108                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3109                 if (hdev->stop_on_err)
3110                         mme_qm_err_cfg |=
3111                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3112
3113                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3114
3115                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3116                         lower_32_bits(CFG_BASE + irq_handler_offset));
3117                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3118                         upper_32_bits(CFG_BASE + irq_handler_offset));
3119
3120                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3121                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3122                                                                         mme_id);
3123
3124                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3125                                 QM_ARB_ERR_MSG_EN_MASK);
3126
3127                 /* Increase ARB WDT to support streams architecture */
3128                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3129                                 GAUDI_ARB_WDT_TIMEOUT);
3130
3131                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3132                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3133                                 QMAN_INTERNAL_MAKE_TRUSTED);
3134         }
3135
3136         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3137         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3138         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3139         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3140 }
3141
3142 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3143 {
3144         struct gaudi_device *gaudi = hdev->asic_specific;
3145         struct gaudi_internal_qman_info *q;
3146         u64 qman_base_addr;
3147         u32 mme_offset;
3148         int i, internal_q_index;
3149
3150         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3151                 return;
3152
3153         /*
3154          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3155          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3156          */
3157
3158         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3159
3160         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3161                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3162                 q = &gaudi->internal_qmans[internal_q_index];
3163                 qman_base_addr = (u64) q->pq_dma_addr;
3164                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3165                                         qman_base_addr);
3166                 if (i == 3)
3167                         mme_offset = 0;
3168         }
3169
3170         /* Initializing lower CP for MME QMANs */
3171         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3172         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3173         gaudi_init_mme_qman(hdev, 0, 4, 0);
3174
3175         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3176         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3177
3178         gaudi->hw_cap_initialized |= HW_CAP_MME;
3179 }
3180
3181 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3182                                 int qman_id, u64 qman_base_addr)
3183 {
3184         struct cpu_dyn_regs *dyn_regs =
3185                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3186         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3187         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3188         u32 tpc_qm_err_cfg, irq_handler_offset;
3189         u32 q_off, tpc_id;
3190
3191         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3192                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3193         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3194                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3195         so_base_en_lo = lower_32_bits(CFG_BASE +
3196                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3197         so_base_en_hi = upper_32_bits(CFG_BASE +
3198                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3199         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3200                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3201         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3202                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3203         so_base_ws_lo = lower_32_bits(CFG_BASE +
3204                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3205         so_base_ws_hi = upper_32_bits(CFG_BASE +
3206                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3207
3208         q_off = tpc_offset + qman_id * 4;
3209
3210         tpc_id = tpc_offset /
3211                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3212
3213         if (qman_id < 4) {
3214                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3215                                         lower_32_bits(qman_base_addr));
3216                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3217                                         upper_32_bits(qman_base_addr));
3218
3219                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3220                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3221                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3222
3223                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3224                                                         QMAN_CPDMA_SIZE_OFFSET);
3225                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3226                                                         QMAN_CPDMA_SRC_OFFSET);
3227                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3228                                                         QMAN_CPDMA_DST_OFFSET);
3229         } else {
3230                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3231                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3232                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3233
3234                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3235                                                         QMAN_LDMA_SIZE_OFFSET);
3236                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3237                                                         QMAN_LDMA_SRC_OFFSET);
3238                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3239                                                         QMAN_LDMA_DST_OFFSET);
3240
3241                 /* Configure RAZWI IRQ */
3242                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3243                 if (hdev->stop_on_err)
3244                         tpc_qm_err_cfg |=
3245                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3246
3247                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3248
3249                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3250                         lower_32_bits(CFG_BASE + irq_handler_offset));
3251                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3252                         upper_32_bits(CFG_BASE + irq_handler_offset));
3253
3254                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3255                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3256                                                                         tpc_id);
3257
3258                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3259                                 QM_ARB_ERR_MSG_EN_MASK);
3260
3261                 /* Increase ARB WDT to support streams architecture */
3262                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3263                                 GAUDI_ARB_WDT_TIMEOUT);
3264
3265                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3266                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3267                                 QMAN_INTERNAL_MAKE_TRUSTED);
3268         }
3269
3270         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3271         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3272         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3273         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3274
3275         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3276         if (tpc_id == 6) {
3277                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3278                                 mtr_base_ws_lo);
3279                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3280                                 mtr_base_ws_hi);
3281                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3282                                 so_base_ws_lo);
3283                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3284                                 so_base_ws_hi);
3285         }
3286 }
3287
3288 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3289 {
3290         struct gaudi_device *gaudi = hdev->asic_specific;
3291         struct gaudi_internal_qman_info *q;
3292         u64 qman_base_addr;
3293         u32 so_base_hi, tpc_offset = 0;
3294         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3295                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3296         int i, tpc_id, internal_q_index;
3297
3298         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3299                 return;
3300
3301         so_base_hi = upper_32_bits(CFG_BASE +
3302                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3303
3304         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3305                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3306                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3307                                                 tpc_id * QMAN_STREAMS + i;
3308                         q = &gaudi->internal_qmans[internal_q_index];
3309                         qman_base_addr = (u64) q->pq_dma_addr;
3310                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3311                                                 qman_base_addr);
3312
3313                         if (i == 3) {
3314                                 /* Initializing lower CP for TPC QMAN */
3315                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3316
3317                                 /* Enable the QMAN and TPC channel */
3318                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3319                                                 QMAN_TPC_ENABLE);
3320                         }
3321                 }
3322
3323                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3324                                 so_base_hi);
3325
3326                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3327
3328                 gaudi->hw_cap_initialized |=
3329                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3330         }
3331 }
3332
3333 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3334                                 int qman_id, u64 qman_base_addr, int nic_id)
3335 {
3336         struct cpu_dyn_regs *dyn_regs =
3337                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3338         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3339         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3340         u32 nic_qm_err_cfg, irq_handler_offset;
3341         u32 q_off;
3342
3343         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3344                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3345         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3346                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3347         so_base_en_lo = lower_32_bits(CFG_BASE +
3348                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3349         so_base_en_hi = upper_32_bits(CFG_BASE +
3350                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3351         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3352                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3353         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3354                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3355         so_base_ws_lo = lower_32_bits(CFG_BASE +
3356                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3357         so_base_ws_hi = upper_32_bits(CFG_BASE +
3358                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3359
3360         q_off = nic_offset + qman_id * 4;
3361
3362         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3363         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3364
3365         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3366         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3367         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3368
3369         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3370                                                         QMAN_LDMA_SIZE_OFFSET);
3371         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3372                                                         QMAN_LDMA_SRC_OFFSET);
3373         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3374                                                         QMAN_LDMA_DST_OFFSET);
3375
3376         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3377         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3378         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3379         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3380
3381         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3382         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3383         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3384         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3385         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3386
3387         if (qman_id == 0) {
3388                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3389                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3390                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3391
3392                 /* Configure RAZWI IRQ */
3393                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3394                 if (hdev->stop_on_err)
3395                         nic_qm_err_cfg |=
3396                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3397
3398                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3399
3400                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3401                         lower_32_bits(CFG_BASE + irq_handler_offset));
3402                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3403                         upper_32_bits(CFG_BASE + irq_handler_offset));
3404
3405                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3406                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3407                                                                         nic_id);
3408
3409                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3410                                 QM_ARB_ERR_MSG_EN_MASK);
3411
3412                 /* Increase ARB WDT to support streams architecture */
3413                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3414                                 GAUDI_ARB_WDT_TIMEOUT);
3415
3416                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3417                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3418                                 QMAN_INTERNAL_MAKE_TRUSTED);
3419         }
3420 }
3421
3422 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3423 {
3424         struct gaudi_device *gaudi = hdev->asic_specific;
3425         struct gaudi_internal_qman_info *q;
3426         u64 qman_base_addr;
3427         u32 nic_offset = 0;
3428         u32 nic_delta_between_qmans =
3429                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3430         u32 nic_delta_between_nics =
3431                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3432         int i, nic_id, internal_q_index;
3433
3434         if (!hdev->nic_ports_mask)
3435                 return;
3436
3437         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3438                 return;
3439
3440         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3441
3442         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3443                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3444                         nic_offset += nic_delta_between_qmans;
3445                         if (nic_id & 1) {
3446                                 nic_offset -= (nic_delta_between_qmans * 2);
3447                                 nic_offset += nic_delta_between_nics;
3448                         }
3449                         continue;
3450                 }
3451
3452                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3453                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3454                                                 nic_id * QMAN_STREAMS + i;
3455                         q = &gaudi->internal_qmans[internal_q_index];
3456                         qman_base_addr = (u64) q->pq_dma_addr;
3457                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3458                                                 qman_base_addr, nic_id);
3459                 }
3460
3461                 /* Enable the QMAN */
3462                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3463
3464                 nic_offset += nic_delta_between_qmans;
3465                 if (nic_id & 1) {
3466                         nic_offset -= (nic_delta_between_qmans * 2);
3467                         nic_offset += nic_delta_between_nics;
3468                 }
3469
3470                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3471         }
3472 }
3473
3474 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3475 {
3476         struct gaudi_device *gaudi = hdev->asic_specific;
3477
3478         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3479                 return;
3480
3481         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3482         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3483         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3484 }
3485
3486 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3487 {
3488         struct gaudi_device *gaudi = hdev->asic_specific;
3489
3490         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3491                 return;
3492
3493         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3494         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3495         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3496         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3497         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3498 }
3499
3500 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3501 {
3502         struct gaudi_device *gaudi = hdev->asic_specific;
3503
3504         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3505                 return;
3506
3507         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3508         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3509 }
3510
3511 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3512 {
3513         struct gaudi_device *gaudi = hdev->asic_specific;
3514         u32 tpc_offset = 0;
3515         int tpc_id;
3516
3517         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3518                 return;
3519
3520         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3521                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3522                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3523         }
3524 }
3525
3526 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3527 {
3528         struct gaudi_device *gaudi = hdev->asic_specific;
3529         u32 nic_mask, nic_offset = 0;
3530         u32 nic_delta_between_qmans =
3531                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3532         u32 nic_delta_between_nics =
3533                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3534         int nic_id;
3535
3536         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3537                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3538
3539                 if (gaudi->hw_cap_initialized & nic_mask)
3540                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3541
3542                 nic_offset += nic_delta_between_qmans;
3543                 if (nic_id & 1) {
3544                         nic_offset -= (nic_delta_between_qmans * 2);
3545                         nic_offset += nic_delta_between_nics;
3546                 }
3547         }
3548 }
3549
3550 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3551 {
3552         struct gaudi_device *gaudi = hdev->asic_specific;
3553
3554         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3555                 return;
3556
3557         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3558         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3559         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3560         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3561 }
3562
3563 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3564 {
3565         struct gaudi_device *gaudi = hdev->asic_specific;
3566
3567         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3568                 return;
3569
3570         /* Stop CPs of HBM DMA QMANs */
3571
3572         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3573         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3574         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3575         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3576         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3577 }
3578
3579 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3580 {
3581         struct gaudi_device *gaudi = hdev->asic_specific;
3582
3583         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3584                 return;
3585
3586         /* Stop CPs of MME QMANs */
3587         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3588         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3589 }
3590
3591 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3592 {
3593         struct gaudi_device *gaudi = hdev->asic_specific;
3594
3595         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3596                 return;
3597
3598         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3599         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3600         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3601         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3602         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3603         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3604         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3605         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3606 }
3607
3608 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3609 {
3610         struct gaudi_device *gaudi = hdev->asic_specific;
3611
3612         /* Stop upper CPs of QMANs */
3613
3614         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3615                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3616                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3617                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3618                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3619
3620         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3621                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3622                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3623                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3624                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3625
3626         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3627                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3628                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3629                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3630                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3631
3632         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3633                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3634                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3635                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3636                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3637
3638         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3639                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3640                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3641                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3642                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3643
3644         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3645                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3646                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3647                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3648                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3649
3650         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3651                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3652                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3653                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3654                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3655
3656         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3657                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3658                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3659                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3660                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3661
3662         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3663                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3664                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3665                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3666                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3667
3668         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3669                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3670                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3671                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3672                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3673 }
3674
3675 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3676 {
3677         struct gaudi_device *gaudi = hdev->asic_specific;
3678
3679         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3680                 return;
3681
3682         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3683         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3684         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3685 }
3686
3687 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3688 {
3689         struct gaudi_device *gaudi = hdev->asic_specific;
3690
3691         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3692                 return;
3693
3694         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3695         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3696         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3697         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3698         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3699 }
3700
3701 static void gaudi_mme_stall(struct hl_device *hdev)
3702 {
3703         struct gaudi_device *gaudi = hdev->asic_specific;
3704
3705         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3706                 return;
3707
3708         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3709         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3710         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3711         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3712         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3713         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3714         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3715         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3716         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3717         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3718         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3719         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3720         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3721         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3722         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3723         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3724         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3725 }
3726
3727 static void gaudi_tpc_stall(struct hl_device *hdev)
3728 {
3729         struct gaudi_device *gaudi = hdev->asic_specific;
3730
3731         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3732                 return;
3733
3734         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3735         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3736         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3737         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3738         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3739         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3740         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3741         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3742 }
3743
3744 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3745 {
3746         u32 qman_offset;
3747         int i;
3748
3749         if (hdev->asic_prop.fw_security_enabled)
3750                 return;
3751
3752         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3753                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3754                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3755
3756                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3757         }
3758
3759         WREG32(mmMME0_QM_CGM_CFG, 0);
3760         WREG32(mmMME0_QM_CGM_CFG1, 0);
3761         WREG32(mmMME2_QM_CGM_CFG, 0);
3762         WREG32(mmMME2_QM_CGM_CFG1, 0);
3763
3764         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3765                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3766                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3767
3768                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3769         }
3770 }
3771
3772 static void gaudi_enable_timestamp(struct hl_device *hdev)
3773 {
3774         /* Disable the timestamp counter */
3775         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3776
3777         /* Zero the lower/upper parts of the 64-bit counter */
3778         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3779         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3780
3781         /* Enable the counter */
3782         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3783 }
3784
3785 static void gaudi_disable_timestamp(struct hl_device *hdev)
3786 {
3787         /* Disable the timestamp counter */
3788         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3789 }
3790
3791 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3792 {
3793         u32 wait_timeout_ms;
3794
3795         dev_info(hdev->dev,
3796                 "Halting compute engines and disabling interrupts\n");
3797
3798         if (hdev->pldm)
3799                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3800         else
3801                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3802
3803         if (fw_reset)
3804                 goto skip_engines;
3805
3806         gaudi_stop_nic_qmans(hdev);
3807         gaudi_stop_mme_qmans(hdev);
3808         gaudi_stop_tpc_qmans(hdev);
3809         gaudi_stop_hbm_dma_qmans(hdev);
3810         gaudi_stop_pci_dma_qmans(hdev);
3811
3812         msleep(wait_timeout_ms);
3813
3814         gaudi_pci_dma_stall(hdev);
3815         gaudi_hbm_dma_stall(hdev);
3816         gaudi_tpc_stall(hdev);
3817         gaudi_mme_stall(hdev);
3818
3819         msleep(wait_timeout_ms);
3820
3821         gaudi_disable_nic_qmans(hdev);
3822         gaudi_disable_mme_qmans(hdev);
3823         gaudi_disable_tpc_qmans(hdev);
3824         gaudi_disable_hbm_dma_qmans(hdev);
3825         gaudi_disable_pci_dma_qmans(hdev);
3826
3827         gaudi_disable_timestamp(hdev);
3828
3829 skip_engines:
3830         gaudi_disable_msi(hdev);
3831 }
3832
3833 static int gaudi_mmu_init(struct hl_device *hdev)
3834 {
3835         struct asic_fixed_properties *prop = &hdev->asic_prop;
3836         struct gaudi_device *gaudi = hdev->asic_specific;
3837         u64 hop0_addr;
3838         int rc, i;
3839
3840         if (!hdev->mmu_enable)
3841                 return 0;
3842
3843         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3844                 return 0;
3845
3846         for (i = 0 ; i < prop->max_asid ; i++) {
3847                 hop0_addr = prop->mmu_pgt_addr +
3848                                 (i * prop->mmu_hop_table_size);
3849
3850                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3851                 if (rc) {
3852                         dev_err(hdev->dev,
3853                                 "failed to set hop0 addr for asid %d\n", i);
3854                         goto err;
3855                 }
3856         }
3857
3858         /* init MMU cache manage page */
3859         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3860         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3861
3862         /* mem cache invalidation */
3863         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3864
3865         hl_mmu_invalidate_cache(hdev, true, 0);
3866
3867         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3868         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3869
3870         WREG32(mmSTLB_HOP_CONFIGURATION,
3871                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3872
3873         /*
3874          * The H/W expects the first PI after init to be 1. After wraparound
3875          * we'll write 0.
3876          */
3877         gaudi->mmu_cache_inv_pi = 1;
3878
3879         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3880
3881         return 0;
3882
3883 err:
3884         return rc;
3885 }
3886
3887 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3888 {
3889         void __iomem *dst;
3890
3891         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3892
3893         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3894 }
3895
3896 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3897 {
3898         void __iomem *dst;
3899
3900         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3901
3902         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3903 }
3904
3905 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3906 {
3907         struct dynamic_fw_load_mgr *dynamic_loader;
3908         struct cpu_dyn_regs *dyn_regs;
3909
3910         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3911
3912         /*
3913          * here we update initial values for few specific dynamic regs (as
3914          * before reading the first descriptor from FW those value has to be
3915          * hard-coded) in later stages of the protocol those values will be
3916          * updated automatically by reading the FW descriptor so data there
3917          * will always be up-to-date
3918          */
3919         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3920         dyn_regs->kmd_msg_to_cpu =
3921                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3922         dyn_regs->cpu_cmd_status_to_host =
3923                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3924
3925         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3926 }
3927
3928 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3929 {
3930         struct static_fw_load_mgr *static_loader;
3931
3932         static_loader = &hdev->fw_loader.static_loader;
3933
3934         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3935         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3936         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3937         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3938         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3939         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3940         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3941         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3942         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3943         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3944         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3945         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3946         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3947                         GAUDI_PLDM_RESET_WAIT_MSEC :
3948                         GAUDI_CPU_RESET_WAIT_MSEC;
3949 }
3950
3951 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3952 {
3953         struct asic_fixed_properties *prop = &hdev->asic_prop;
3954         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3955
3956         /* fill common fields */
3957         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3958         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3959         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3960         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3961         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3962         fw_loader->skip_bmc = !hdev->bmc_enable;
3963         fw_loader->sram_bar_id = SRAM_BAR_ID;
3964         fw_loader->dram_bar_id = HBM_BAR_ID;
3965
3966         if (prop->dynamic_fw_load)
3967                 gaudi_init_dynamic_firmware_loader(hdev);
3968         else
3969                 gaudi_init_static_firmware_loader(hdev);
3970 }
3971
3972 static int gaudi_init_cpu(struct hl_device *hdev)
3973 {
3974         struct gaudi_device *gaudi = hdev->asic_specific;
3975         int rc;
3976
3977         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3978                 return 0;
3979
3980         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3981                 return 0;
3982
3983         /*
3984          * The device CPU works with 40 bits addresses.
3985          * This register sets the extension to 50 bits.
3986          */
3987         if (!hdev->asic_prop.fw_security_enabled)
3988                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3989
3990         rc = hl_fw_init_cpu(hdev);
3991
3992         if (rc)
3993                 return rc;
3994
3995         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3996
3997         return 0;
3998 }
3999
4000 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4001 {
4002         struct cpu_dyn_regs *dyn_regs =
4003                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4004         struct asic_fixed_properties *prop = &hdev->asic_prop;
4005         struct gaudi_device *gaudi = hdev->asic_specific;
4006         u32 status, irq_handler_offset;
4007         struct hl_eq *eq;
4008         struct hl_hw_queue *cpu_pq =
4009                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4010         int err;
4011
4012         if (!hdev->cpu_queues_enable)
4013                 return 0;
4014
4015         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4016                 return 0;
4017
4018         eq = &hdev->event_queue;
4019
4020         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4021         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4022
4023         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4024         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4025
4026         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4027                         lower_32_bits(hdev->cpu_accessible_dma_address));
4028         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4029                         upper_32_bits(hdev->cpu_accessible_dma_address));
4030
4031         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4032         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4033         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4034
4035         /* Used for EQ CI */
4036         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4037
4038         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4039
4040         if (gaudi->multi_msi_mode)
4041                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4042         else
4043                 WREG32(mmCPU_IF_QUEUE_INIT,
4044                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4045
4046         irq_handler_offset = prop->gic_interrupts_enable ?
4047                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4048                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4049
4050         WREG32(irq_handler_offset,
4051                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4052
4053         err = hl_poll_timeout(
4054                 hdev,
4055                 mmCPU_IF_QUEUE_INIT,
4056                 status,
4057                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4058                 1000,
4059                 cpu_timeout);
4060
4061         if (err) {
4062                 dev_err(hdev->dev,
4063                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4064                 return -EIO;
4065         }
4066
4067         /* update FW application security bits */
4068         if (prop->fw_cpu_boot_dev_sts0_valid)
4069                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4070         if (prop->fw_cpu_boot_dev_sts1_valid)
4071                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4072
4073         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4074         return 0;
4075 }
4076
4077 static void gaudi_pre_hw_init(struct hl_device *hdev)
4078 {
4079         /* Perform read from the device to make sure device is up */
4080         RREG32(mmHW_STATE);
4081
4082         if (!hdev->asic_prop.fw_security_enabled) {
4083                 /* Set the access through PCI bars (Linux driver only) as
4084                  * secured
4085                  */
4086                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4087                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4088                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4089
4090                 /* Perform read to flush the waiting writes to ensure
4091                  * configuration was set in the device
4092                  */
4093                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4094         }
4095
4096         /*
4097          * Let's mark in the H/W that we have reached this point. We check
4098          * this value in the reset_before_init function to understand whether
4099          * we need to reset the chip before doing H/W init. This register is
4100          * cleared by the H/W upon H/W reset
4101          */
4102         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4103 }
4104
4105 static int gaudi_hw_init(struct hl_device *hdev)
4106 {
4107         struct gaudi_device *gaudi = hdev->asic_specific;
4108         int rc;
4109
4110         gaudi_pre_hw_init(hdev);
4111
4112         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4113          * So we set it here and if anyone tries to move it later to
4114          * a different address, there will be an error
4115          */
4116         if (hdev->asic_prop.iatu_done_by_fw)
4117                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4118
4119         /*
4120          * Before pushing u-boot/linux to device, need to set the hbm bar to
4121          * base address of dram
4122          */
4123         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4124                 dev_err(hdev->dev,
4125                         "failed to map HBM bar to DRAM base address\n");
4126                 return -EIO;
4127         }
4128
4129         rc = gaudi_init_cpu(hdev);
4130         if (rc) {
4131                 dev_err(hdev->dev, "failed to initialize CPU\n");
4132                 return rc;
4133         }
4134
4135         /* In case the clock gating was enabled in preboot we need to disable
4136          * it here before touching the MME/TPC registers.
4137          */
4138         gaudi_disable_clock_gating(hdev);
4139
4140         /* SRAM scrambler must be initialized after CPU is running from HBM */
4141         gaudi_init_scrambler_sram(hdev);
4142
4143         /* This is here just in case we are working without CPU */
4144         gaudi_init_scrambler_hbm(hdev);
4145
4146         gaudi_init_golden_registers(hdev);
4147
4148         rc = gaudi_mmu_init(hdev);
4149         if (rc)
4150                 return rc;
4151
4152         gaudi_init_security(hdev);
4153
4154         gaudi_init_pci_dma_qmans(hdev);
4155
4156         gaudi_init_hbm_dma_qmans(hdev);
4157
4158         gaudi_init_mme_qmans(hdev);
4159
4160         gaudi_init_tpc_qmans(hdev);
4161
4162         gaudi_init_nic_qmans(hdev);
4163
4164         gaudi_enable_timestamp(hdev);
4165
4166         /* MSI must be enabled before CPU queues and NIC are initialized */
4167         rc = gaudi_enable_msi(hdev);
4168         if (rc)
4169                 goto disable_queues;
4170
4171         /* must be called after MSI was enabled */
4172         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4173         if (rc) {
4174                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4175                         rc);
4176                 goto disable_msi;
4177         }
4178
4179         /* Perform read from the device to flush all configuration */
4180         RREG32(mmHW_STATE);
4181
4182         return 0;
4183
4184 disable_msi:
4185         gaudi_disable_msi(hdev);
4186 disable_queues:
4187         gaudi_disable_mme_qmans(hdev);
4188         gaudi_disable_pci_dma_qmans(hdev);
4189
4190         return rc;
4191 }
4192
4193 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4194 {
4195         struct cpu_dyn_regs *dyn_regs =
4196                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4197         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4198         struct gaudi_device *gaudi = hdev->asic_specific;
4199         bool driver_performs_reset;
4200
4201         if (!hard_reset) {
4202                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4203                 return;
4204         }
4205
4206         if (hdev->pldm) {
4207                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4208                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4209         } else {
4210                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4211                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4212         }
4213
4214         if (fw_reset) {
4215                 dev_info(hdev->dev,
4216                         "Firmware performs HARD reset, going to wait %dms\n",
4217                         reset_timeout_ms);
4218
4219                 goto skip_reset;
4220         }
4221
4222         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4223                                         !hdev->asic_prop.hard_reset_done_by_fw);
4224
4225         /* Set device to handle FLR by H/W as we will put the device CPU to
4226          * halt mode
4227          */
4228         if (driver_performs_reset)
4229                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4230                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4231
4232         /* If linux is loaded in the device CPU we need to communicate with it
4233          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4234          * registers in case of old F/Ws
4235          */
4236         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4237                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4238                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4239                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4240
4241                 WREG32(irq_handler_offset,
4242                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4243
4244                 /* This is a hail-mary attempt to revive the card in the small chance that the
4245                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4246                  * In that case, triggering reset through GIC won't help. We need to trigger the
4247                  * reset as if Linux wasn't loaded.
4248                  *
4249                  * We do it only if the reset cause was HB, because that would be the indication
4250                  * of such an event.
4251                  *
4252                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4253                  * damage.
4254                  */
4255                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4256                         if (hdev->asic_prop.hard_reset_done_by_fw)
4257                                 hl_fw_ask_hard_reset_without_linux(hdev);
4258                         else
4259                                 hl_fw_ask_halt_machine_without_linux(hdev);
4260                 }
4261         } else {
4262                 if (hdev->asic_prop.hard_reset_done_by_fw)
4263                         hl_fw_ask_hard_reset_without_linux(hdev);
4264                 else
4265                         hl_fw_ask_halt_machine_without_linux(hdev);
4266         }
4267
4268         if (driver_performs_reset) {
4269
4270                 /* Configure the reset registers. Must be done as early as
4271                  * possible in case we fail during H/W initialization
4272                  */
4273                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4274                                                 (CFG_RST_H_DMA_MASK |
4275                                                 CFG_RST_H_MME_MASK |
4276                                                 CFG_RST_H_SM_MASK |
4277                                                 CFG_RST_H_TPC_7_MASK));
4278
4279                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4280
4281                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4282                                                 (CFG_RST_H_HBM_MASK |
4283                                                 CFG_RST_H_TPC_7_MASK |
4284                                                 CFG_RST_H_NIC_MASK |
4285                                                 CFG_RST_H_SM_MASK |
4286                                                 CFG_RST_H_DMA_MASK |
4287                                                 CFG_RST_H_MME_MASK |
4288                                                 CFG_RST_H_CPU_MASK |
4289                                                 CFG_RST_H_MMU_MASK));
4290
4291                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4292                                                 (CFG_RST_L_IF_MASK |
4293                                                 CFG_RST_L_PSOC_MASK |
4294                                                 CFG_RST_L_TPC_MASK));
4295
4296                 msleep(cpu_timeout_ms);
4297
4298                 /* Tell ASIC not to re-initialize PCIe */
4299                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4300
4301                 /* Restart BTL/BLR upon hard-reset */
4302                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4303
4304                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4305                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4306
4307                 dev_info(hdev->dev,
4308                         "Issued HARD reset command, going to wait %dms\n",
4309                         reset_timeout_ms);
4310         } else {
4311                 dev_info(hdev->dev,
4312                         "Firmware performs HARD reset, going to wait %dms\n",
4313                         reset_timeout_ms);
4314         }
4315
4316 skip_reset:
4317         /*
4318          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4319          * itself is in reset. Need to wait until the reset is deasserted
4320          */
4321         msleep(reset_timeout_ms);
4322
4323         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4324         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4325                 dev_err(hdev->dev,
4326                         "Timeout while waiting for device to reset 0x%x\n",
4327                         status);
4328
4329         if (gaudi) {
4330                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4331                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4332                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4333                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4334                                                 HW_CAP_HBM_SCRAMBLER);
4335
4336                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4337
4338                 hdev->device_cpu_is_halted = false;
4339         }
4340 }
4341
4342 static int gaudi_suspend(struct hl_device *hdev)
4343 {
4344         int rc;
4345
4346         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4347         if (rc)
4348                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4349
4350         return rc;
4351 }
4352
4353 static int gaudi_resume(struct hl_device *hdev)
4354 {
4355         return gaudi_init_iatu(hdev);
4356 }
4357
4358 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4359                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4360 {
4361         int rc;
4362
4363         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4364                         VM_DONTCOPY | VM_NORESERVE;
4365
4366         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4367                                 (dma_addr - HOST_PHYS_BASE), size);
4368         if (rc)
4369                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4370
4371         return rc;
4372 }
4373
4374 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4375 {
4376         struct cpu_dyn_regs *dyn_regs =
4377                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4378         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4379         struct gaudi_device *gaudi = hdev->asic_specific;
4380         bool invalid_queue = false;
4381         int dma_id;
4382
4383         switch (hw_queue_id) {
4384         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4385                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4386                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4387                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4388                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4389                 break;
4390
4391         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4392                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4393                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4394                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4395                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4396                 break;
4397
4398         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4399                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4400                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4401                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4402                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4403                 break;
4404
4405         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4406                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4407                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4408                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4409                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4410                 break;
4411
4412         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4413                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4414                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4415                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4416                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4417                 break;
4418
4419         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4420                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4421                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4422                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4423                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4424                 break;
4425
4426         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4427                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4428                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4429                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4430                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4431                 break;
4432
4433         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4434                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4435                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4436                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4437                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4438                 break;
4439
4440         case GAUDI_QUEUE_ID_CPU_PQ:
4441                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4442                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4443                 else
4444                         invalid_queue = true;
4445                 break;
4446
4447         case GAUDI_QUEUE_ID_MME_0_0:
4448                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4449                 break;
4450
4451         case GAUDI_QUEUE_ID_MME_0_1:
4452                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4453                 break;
4454
4455         case GAUDI_QUEUE_ID_MME_0_2:
4456                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4457                 break;
4458
4459         case GAUDI_QUEUE_ID_MME_0_3:
4460                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4461                 break;
4462
4463         case GAUDI_QUEUE_ID_MME_1_0:
4464                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4465                 break;
4466
4467         case GAUDI_QUEUE_ID_MME_1_1:
4468                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4469                 break;
4470
4471         case GAUDI_QUEUE_ID_MME_1_2:
4472                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4473                 break;
4474
4475         case GAUDI_QUEUE_ID_MME_1_3:
4476                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4477                 break;
4478
4479         case GAUDI_QUEUE_ID_TPC_0_0:
4480                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4481                 break;
4482
4483         case GAUDI_QUEUE_ID_TPC_0_1:
4484                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4485                 break;
4486
4487         case GAUDI_QUEUE_ID_TPC_0_2:
4488                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4489                 break;
4490
4491         case GAUDI_QUEUE_ID_TPC_0_3:
4492                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4493                 break;
4494
4495         case GAUDI_QUEUE_ID_TPC_1_0:
4496                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4497                 break;
4498
4499         case GAUDI_QUEUE_ID_TPC_1_1:
4500                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4501                 break;
4502
4503         case GAUDI_QUEUE_ID_TPC_1_2:
4504                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4505                 break;
4506
4507         case GAUDI_QUEUE_ID_TPC_1_3:
4508                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4509                 break;
4510
4511         case GAUDI_QUEUE_ID_TPC_2_0:
4512                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4513                 break;
4514
4515         case GAUDI_QUEUE_ID_TPC_2_1:
4516                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4517                 break;
4518
4519         case GAUDI_QUEUE_ID_TPC_2_2:
4520                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4521                 break;
4522
4523         case GAUDI_QUEUE_ID_TPC_2_3:
4524                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4525                 break;
4526
4527         case GAUDI_QUEUE_ID_TPC_3_0:
4528                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4529                 break;
4530
4531         case GAUDI_QUEUE_ID_TPC_3_1:
4532                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4533                 break;
4534
4535         case GAUDI_QUEUE_ID_TPC_3_2:
4536                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4537                 break;
4538
4539         case GAUDI_QUEUE_ID_TPC_3_3:
4540                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4541                 break;
4542
4543         case GAUDI_QUEUE_ID_TPC_4_0:
4544                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4545                 break;
4546
4547         case GAUDI_QUEUE_ID_TPC_4_1:
4548                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4549                 break;
4550
4551         case GAUDI_QUEUE_ID_TPC_4_2:
4552                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4553                 break;
4554
4555         case GAUDI_QUEUE_ID_TPC_4_3:
4556                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4557                 break;
4558
4559         case GAUDI_QUEUE_ID_TPC_5_0:
4560                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4561                 break;
4562
4563         case GAUDI_QUEUE_ID_TPC_5_1:
4564                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4565                 break;
4566
4567         case GAUDI_QUEUE_ID_TPC_5_2:
4568                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4569                 break;
4570
4571         case GAUDI_QUEUE_ID_TPC_5_3:
4572                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4573                 break;
4574
4575         case GAUDI_QUEUE_ID_TPC_6_0:
4576                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4577                 break;
4578
4579         case GAUDI_QUEUE_ID_TPC_6_1:
4580                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4581                 break;
4582
4583         case GAUDI_QUEUE_ID_TPC_6_2:
4584                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4585                 break;
4586
4587         case GAUDI_QUEUE_ID_TPC_6_3:
4588                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4589                 break;
4590
4591         case GAUDI_QUEUE_ID_TPC_7_0:
4592                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4593                 break;
4594
4595         case GAUDI_QUEUE_ID_TPC_7_1:
4596                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4597                 break;
4598
4599         case GAUDI_QUEUE_ID_TPC_7_2:
4600                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4601                 break;
4602
4603         case GAUDI_QUEUE_ID_TPC_7_3:
4604                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4605                 break;
4606
4607         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4608                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4609                         invalid_queue = true;
4610
4611                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4612                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4613                 break;
4614
4615         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4616                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4617                         invalid_queue = true;
4618
4619                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4620                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4621                 break;
4622
4623         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4624                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4625                         invalid_queue = true;
4626
4627                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4628                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4629                 break;
4630
4631         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4632                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4633                         invalid_queue = true;
4634
4635                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4636                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4637                 break;
4638
4639         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4640                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4641                         invalid_queue = true;
4642
4643                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4644                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4645                 break;
4646
4647         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4648                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4649                         invalid_queue = true;
4650
4651                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4652                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4653                 break;
4654
4655         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4656                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4657                         invalid_queue = true;
4658
4659                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4660                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4661                 break;
4662
4663         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4664                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4665                         invalid_queue = true;
4666
4667                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4668                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4669                 break;
4670
4671         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4672                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4673                         invalid_queue = true;
4674
4675                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4676                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4677                 break;
4678
4679         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4680                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4681                         invalid_queue = true;
4682
4683                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4684                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4685                 break;
4686
4687         default:
4688                 invalid_queue = true;
4689         }
4690
4691         if (invalid_queue) {
4692                 /* Should never get here */
4693                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4694                         hw_queue_id);
4695                 return;
4696         }
4697
4698         db_value = pi;
4699
4700         /* ring the doorbell */
4701         WREG32(db_reg_offset, db_value);
4702
4703         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4704                 /* make sure device CPU will read latest data from host */
4705                 mb();
4706
4707                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4708                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4709                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4710
4711                 WREG32(irq_handler_offset,
4712                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4713         }
4714 }
4715
4716 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4717                                 struct hl_bd *bd)
4718 {
4719         __le64 *pbd = (__le64 *) bd;
4720
4721         /* The QMANs are on the host memory so a simple copy suffice */
4722         pqe[0] = pbd[0];
4723         pqe[1] = pbd[1];
4724 }
4725
4726 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4727                                         dma_addr_t *dma_handle, gfp_t flags)
4728 {
4729         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4730                                                 dma_handle, flags);
4731
4732         /* Shift to the device's base physical address of host memory */
4733         if (kernel_addr)
4734                 *dma_handle += HOST_PHYS_BASE;
4735
4736         return kernel_addr;
4737 }
4738
4739 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4740                 void *cpu_addr, dma_addr_t dma_handle)
4741 {
4742         /* Cancel the device's base physical address of host memory */
4743         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4744
4745         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4746 }
4747
4748 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4749 {
4750         struct asic_fixed_properties *prop = &hdev->asic_prop;
4751         u64  cur_addr = DRAM_BASE_ADDR_USER;
4752         u32 val;
4753         u32 chunk_size;
4754         int rc, dma_id;
4755
4756         while (cur_addr < prop->dram_end_address) {
4757                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4758                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4759
4760                         chunk_size =
4761                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4762
4763                         dev_dbg(hdev->dev,
4764                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4765                                 cur_addr, cur_addr + chunk_size);
4766
4767                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4768                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4769                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4770                                                 lower_32_bits(cur_addr));
4771                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4772                                                 upper_32_bits(cur_addr));
4773                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4774                                         chunk_size);
4775                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4776                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4777                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4778
4779                         cur_addr += chunk_size;
4780
4781                         if (cur_addr == prop->dram_end_address)
4782                                 break;
4783                 }
4784
4785                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4786                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4787
4788                         rc = hl_poll_timeout(
4789                                 hdev,
4790                                 mmDMA0_CORE_STS0 + dma_offset,
4791                                 val,
4792                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4793                                 1000,
4794                                 HBM_SCRUBBING_TIMEOUT_US);
4795
4796                         if (rc) {
4797                                 dev_err(hdev->dev,
4798                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4799                                         dma_id);
4800                                 return -EIO;
4801                         }
4802                 }
4803         }
4804
4805         return 0;
4806 }
4807
4808 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4809 {
4810         struct asic_fixed_properties *prop = &hdev->asic_prop;
4811         int rc = 0;
4812         u64 val = 0;
4813
4814         if (!hdev->memory_scrub)
4815                 return 0;
4816
4817         if (!addr && !size) {
4818                 /* Wait till device is idle */
4819                 rc = hl_poll_timeout(
4820                                 hdev,
4821                                 mmDMA0_CORE_STS0/* dummy */,
4822                                 val/* dummy */,
4823                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4824                                                 0, NULL)),
4825                                                 1000,
4826                                                 HBM_SCRUBBING_TIMEOUT_US);
4827                 if (rc) {
4828                         dev_err(hdev->dev, "waiting for idle timeout\n");
4829                         return -EIO;
4830                 }
4831
4832                 /* Scrub SRAM */
4833                 addr = prop->sram_user_base_address;
4834                 size = hdev->pldm ? 0x10000 :
4835                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4836                 val = 0x7777777777777777ull;
4837
4838                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4839                 if (rc) {
4840                         dev_err(hdev->dev,
4841                                 "Failed to clear SRAM in mem scrub all\n");
4842                         return rc;
4843                 }
4844
4845                 /* Scrub HBM using all DMA channels in parallel */
4846                 rc = gaudi_hbm_scrubbing(hdev);
4847                 if (rc)
4848                         dev_err(hdev->dev,
4849                                 "Failed to clear HBM in mem scrub all\n");
4850         }
4851
4852         return rc;
4853 }
4854
4855 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4856                                 u32 queue_id, dma_addr_t *dma_handle,
4857                                 u16 *queue_len)
4858 {
4859         struct gaudi_device *gaudi = hdev->asic_specific;
4860         struct gaudi_internal_qman_info *q;
4861
4862         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4863                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4864                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4865                 return NULL;
4866         }
4867
4868         q = &gaudi->internal_qmans[queue_id];
4869         *dma_handle = q->pq_dma_addr;
4870         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4871
4872         return q->pq_kernel_addr;
4873 }
4874
4875 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4876                                 u16 len, u32 timeout, u64 *result)
4877 {
4878         struct gaudi_device *gaudi = hdev->asic_specific;
4879
4880         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4881                 if (result)
4882                         *result = 0;
4883                 return 0;
4884         }
4885
4886         if (!timeout)
4887                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4888
4889         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4890                                                 timeout, result);
4891 }
4892
4893 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4894 {
4895         struct packet_msg_prot *fence_pkt;
4896         dma_addr_t pkt_dma_addr;
4897         u32 fence_val, tmp, timeout_usec;
4898         dma_addr_t fence_dma_addr;
4899         u32 *fence_ptr;
4900         int rc;
4901
4902         if (hdev->pldm)
4903                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4904         else
4905                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4906
4907         fence_val = GAUDI_QMAN0_FENCE_VAL;
4908
4909         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4910                                                         &fence_dma_addr);
4911         if (!fence_ptr) {
4912                 dev_err(hdev->dev,
4913                         "Failed to allocate memory for H/W queue %d testing\n",
4914                         hw_queue_id);
4915                 return -ENOMEM;
4916         }
4917
4918         *fence_ptr = 0;
4919
4920         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4921                                         sizeof(struct packet_msg_prot),
4922                                         GFP_KERNEL, &pkt_dma_addr);
4923         if (!fence_pkt) {
4924                 dev_err(hdev->dev,
4925                         "Failed to allocate packet for H/W queue %d testing\n",
4926                         hw_queue_id);
4927                 rc = -ENOMEM;
4928                 goto free_fence_ptr;
4929         }
4930
4931         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4932         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4933         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4934
4935         fence_pkt->ctl = cpu_to_le32(tmp);
4936         fence_pkt->value = cpu_to_le32(fence_val);
4937         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4938
4939         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4940                                         sizeof(struct packet_msg_prot),
4941                                         pkt_dma_addr);
4942         if (rc) {
4943                 dev_err(hdev->dev,
4944                         "Failed to send fence packet to H/W queue %d\n",
4945                         hw_queue_id);
4946                 goto free_pkt;
4947         }
4948
4949         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4950                                         1000, timeout_usec, true);
4951
4952         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4953
4954         if (rc == -ETIMEDOUT) {
4955                 dev_err(hdev->dev,
4956                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4957                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4958                 rc = -EIO;
4959         }
4960
4961 free_pkt:
4962         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4963                                         pkt_dma_addr);
4964 free_fence_ptr:
4965         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4966                                         fence_dma_addr);
4967         return rc;
4968 }
4969
4970 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4971 {
4972         struct gaudi_device *gaudi = hdev->asic_specific;
4973
4974         /*
4975          * check capability here as send_cpu_message() won't update the result
4976          * value if no capability
4977          */
4978         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4979                 return 0;
4980
4981         return hl_fw_test_cpu_queue(hdev);
4982 }
4983
4984 static int gaudi_test_queues(struct hl_device *hdev)
4985 {
4986         int i, rc, ret_val = 0;
4987
4988         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4989                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4990                         rc = gaudi_test_queue(hdev, i);
4991                         if (rc)
4992                                 ret_val = -EINVAL;
4993                 }
4994         }
4995
4996         rc = gaudi_test_cpu_queue(hdev);
4997         if (rc)
4998                 ret_val = -EINVAL;
4999
5000         return ret_val;
5001 }
5002
5003 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5004                 gfp_t mem_flags, dma_addr_t *dma_handle)
5005 {
5006         void *kernel_addr;
5007
5008         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5009                 return NULL;
5010
5011         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5012
5013         /* Shift to the device's base physical address of host memory */
5014         if (kernel_addr)
5015                 *dma_handle += HOST_PHYS_BASE;
5016
5017         return kernel_addr;
5018 }
5019
5020 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5021                         dma_addr_t dma_addr)
5022 {
5023         /* Cancel the device's base physical address of host memory */
5024         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5025
5026         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5027 }
5028
5029 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5030                                         size_t size, dma_addr_t *dma_handle)
5031 {
5032         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5033 }
5034
5035 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5036                                                 size_t size, void *vaddr)
5037 {
5038         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5039 }
5040
5041 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5042                         int nents, enum dma_data_direction dir)
5043 {
5044         struct scatterlist *sg;
5045         int i;
5046
5047         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5048                 return -ENOMEM;
5049
5050         /* Shift to the device's base physical address of host memory */
5051         for_each_sg(sgl, sg, nents, i)
5052                 sg->dma_address += HOST_PHYS_BASE;
5053
5054         return 0;
5055 }
5056
5057 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5058                         int nents, enum dma_data_direction dir)
5059 {
5060         struct scatterlist *sg;
5061         int i;
5062
5063         /* Cancel the device's base physical address of host memory */
5064         for_each_sg(sgl, sg, nents, i)
5065                 sg->dma_address -= HOST_PHYS_BASE;
5066
5067         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5068 }
5069
5070 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5071                                         struct sg_table *sgt)
5072 {
5073         struct scatterlist *sg, *sg_next_iter;
5074         u32 count, dma_desc_cnt;
5075         u64 len, len_next;
5076         dma_addr_t addr, addr_next;
5077
5078         dma_desc_cnt = 0;
5079
5080         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5081
5082                 len = sg_dma_len(sg);
5083                 addr = sg_dma_address(sg);
5084
5085                 if (len == 0)
5086                         break;
5087
5088                 while ((count + 1) < sgt->nents) {
5089                         sg_next_iter = sg_next(sg);
5090                         len_next = sg_dma_len(sg_next_iter);
5091                         addr_next = sg_dma_address(sg_next_iter);
5092
5093                         if (len_next == 0)
5094                                 break;
5095
5096                         if ((addr + len == addr_next) &&
5097                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5098                                 len += len_next;
5099                                 count++;
5100                                 sg = sg_next_iter;
5101                         } else {
5102                                 break;
5103                         }
5104                 }
5105
5106                 dma_desc_cnt++;
5107         }
5108
5109         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5110 }
5111
5112 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5113                                 struct hl_cs_parser *parser,
5114                                 struct packet_lin_dma *user_dma_pkt,
5115                                 u64 addr, enum dma_data_direction dir)
5116 {
5117         struct hl_userptr *userptr;
5118         int rc;
5119
5120         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5121                         parser->job_userptr_list, &userptr))
5122                 goto already_pinned;
5123
5124         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5125         if (!userptr)
5126                 return -ENOMEM;
5127
5128         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5129                                 userptr);
5130         if (rc)
5131                 goto free_userptr;
5132
5133         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5134
5135         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5136                                         userptr->sgt->nents, dir);
5137         if (rc) {
5138                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5139                 goto unpin_memory;
5140         }
5141
5142         userptr->dma_mapped = true;
5143         userptr->dir = dir;
5144
5145 already_pinned:
5146         parser->patched_cb_size +=
5147                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5148
5149         return 0;
5150
5151 unpin_memory:
5152         list_del(&userptr->job_node);
5153         hl_unpin_host_memory(hdev, userptr);
5154 free_userptr:
5155         kfree(userptr);
5156         return rc;
5157 }
5158
5159 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5160                                 struct hl_cs_parser *parser,
5161                                 struct packet_lin_dma *user_dma_pkt,
5162                                 bool src_in_host)
5163 {
5164         enum dma_data_direction dir;
5165         bool skip_host_mem_pin = false, user_memset;
5166         u64 addr;
5167         int rc = 0;
5168
5169         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5170                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5171                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5172
5173         if (src_in_host) {
5174                 if (user_memset)
5175                         skip_host_mem_pin = true;
5176
5177                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5178                 dir = DMA_TO_DEVICE;
5179                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5180         } else {
5181                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5182                 dir = DMA_FROM_DEVICE;
5183                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5184                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5185                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5186         }
5187
5188         if (skip_host_mem_pin)
5189                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5190         else
5191                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5192                                                 addr, dir);
5193
5194         return rc;
5195 }
5196
5197 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5198                                 struct hl_cs_parser *parser,
5199                                 struct packet_lin_dma *user_dma_pkt)
5200 {
5201         bool src_in_host = false;
5202         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5203                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5204                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5205
5206         dev_dbg(hdev->dev, "DMA packet details:\n");
5207         dev_dbg(hdev->dev, "source == 0x%llx\n",
5208                                 le64_to_cpu(user_dma_pkt->src_addr));
5209         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5210         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5211
5212         /*
5213          * Special handling for DMA with size 0. Bypass all validations
5214          * because no transactions will be done except for WR_COMP, which
5215          * is not a security issue
5216          */
5217         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5218                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5219                 return 0;
5220         }
5221
5222         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5223                 src_in_host = true;
5224
5225         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5226                                                 src_in_host);
5227 }
5228
5229 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5230                                         struct hl_cs_parser *parser,
5231                                         struct packet_load_and_exe *user_pkt)
5232 {
5233         u32 cfg;
5234
5235         cfg = le32_to_cpu(user_pkt->cfg);
5236
5237         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5238                 dev_err(hdev->dev,
5239                         "User not allowed to use Load and Execute\n");
5240                 return -EPERM;
5241         }
5242
5243         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5244
5245         return 0;
5246 }
5247
5248 static int gaudi_validate_cb(struct hl_device *hdev,
5249                         struct hl_cs_parser *parser, bool is_mmu)
5250 {
5251         u32 cb_parsed_length = 0;
5252         int rc = 0;
5253
5254         parser->patched_cb_size = 0;
5255
5256         /* cb_user_size is more than 0 so loop will always be executed */
5257         while (cb_parsed_length < parser->user_cb_size) {
5258                 enum packet_id pkt_id;
5259                 u16 pkt_size;
5260                 struct gaudi_packet *user_pkt;
5261
5262                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5263
5264                 pkt_id = (enum packet_id) (
5265                                 (le64_to_cpu(user_pkt->header) &
5266                                 PACKET_HEADER_PACKET_ID_MASK) >>
5267                                         PACKET_HEADER_PACKET_ID_SHIFT);
5268
5269                 if (!validate_packet_id(pkt_id)) {
5270                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5271                         rc = -EINVAL;
5272                         break;
5273                 }
5274
5275                 pkt_size = gaudi_packet_sizes[pkt_id];
5276                 cb_parsed_length += pkt_size;
5277                 if (cb_parsed_length > parser->user_cb_size) {
5278                         dev_err(hdev->dev,
5279                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5280                         rc = -EINVAL;
5281                         break;
5282                 }
5283
5284                 switch (pkt_id) {
5285                 case PACKET_MSG_PROT:
5286                         dev_err(hdev->dev,
5287                                 "User not allowed to use MSG_PROT\n");
5288                         rc = -EPERM;
5289                         break;
5290
5291                 case PACKET_CP_DMA:
5292                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5293                         rc = -EPERM;
5294                         break;
5295
5296                 case PACKET_STOP:
5297                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5298                         rc = -EPERM;
5299                         break;
5300
5301                 case PACKET_WREG_BULK:
5302                         dev_err(hdev->dev,
5303                                 "User not allowed to use WREG_BULK\n");
5304                         rc = -EPERM;
5305                         break;
5306
5307                 case PACKET_LOAD_AND_EXE:
5308                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5309                                 (struct packet_load_and_exe *) user_pkt);
5310                         break;
5311
5312                 case PACKET_LIN_DMA:
5313                         parser->contains_dma_pkt = true;
5314                         if (is_mmu)
5315                                 parser->patched_cb_size += pkt_size;
5316                         else
5317                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5318                                         (struct packet_lin_dma *) user_pkt);
5319                         break;
5320
5321                 case PACKET_WREG_32:
5322                 case PACKET_MSG_LONG:
5323                 case PACKET_MSG_SHORT:
5324                 case PACKET_REPEAT:
5325                 case PACKET_FENCE:
5326                 case PACKET_NOP:
5327                 case PACKET_ARB_POINT:
5328                         parser->patched_cb_size += pkt_size;
5329                         break;
5330
5331                 default:
5332                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5333                                 pkt_id);
5334                         rc = -EINVAL;
5335                         break;
5336                 }
5337
5338                 if (rc)
5339                         break;
5340         }
5341
5342         /*
5343          * The new CB should have space at the end for two MSG_PROT packets:
5344          * 1. A packet that will act as a completion packet
5345          * 2. A packet that will generate MSI-X interrupt
5346          */
5347         if (parser->completion)
5348                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5349
5350         return rc;
5351 }
5352
5353 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5354                                 struct hl_cs_parser *parser,
5355                                 struct packet_lin_dma *user_dma_pkt,
5356                                 struct packet_lin_dma *new_dma_pkt,
5357                                 u32 *new_dma_pkt_size)
5358 {
5359         struct hl_userptr *userptr;
5360         struct scatterlist *sg, *sg_next_iter;
5361         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5362         u64 len, len_next;
5363         dma_addr_t dma_addr, dma_addr_next;
5364         u64 device_memory_addr, addr;
5365         enum dma_data_direction dir;
5366         struct sg_table *sgt;
5367         bool src_in_host = false;
5368         bool skip_host_mem_pin = false;
5369         bool user_memset;
5370
5371         ctl = le32_to_cpu(user_dma_pkt->ctl);
5372
5373         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5374                 src_in_host = true;
5375
5376         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5377                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5378
5379         if (src_in_host) {
5380                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5381                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5382                 dir = DMA_TO_DEVICE;
5383                 if (user_memset)
5384                         skip_host_mem_pin = true;
5385         } else {
5386                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5387                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5388                 dir = DMA_FROM_DEVICE;
5389         }
5390
5391         if ((!skip_host_mem_pin) &&
5392                 (!hl_userptr_is_pinned(hdev, addr,
5393                                         le32_to_cpu(user_dma_pkt->tsize),
5394                                         parser->job_userptr_list, &userptr))) {
5395                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5396                                 addr, user_dma_pkt->tsize);
5397                 return -EFAULT;
5398         }
5399
5400         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5401                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5402                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5403                 return 0;
5404         }
5405
5406         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5407
5408         sgt = userptr->sgt;
5409         dma_desc_cnt = 0;
5410
5411         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5412                 len = sg_dma_len(sg);
5413                 dma_addr = sg_dma_address(sg);
5414
5415                 if (len == 0)
5416                         break;
5417
5418                 while ((count + 1) < sgt->nents) {
5419                         sg_next_iter = sg_next(sg);
5420                         len_next = sg_dma_len(sg_next_iter);
5421                         dma_addr_next = sg_dma_address(sg_next_iter);
5422
5423                         if (len_next == 0)
5424                                 break;
5425
5426                         if ((dma_addr + len == dma_addr_next) &&
5427                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5428                                 len += len_next;
5429                                 count++;
5430                                 sg = sg_next_iter;
5431                         } else {
5432                                 break;
5433                         }
5434                 }
5435
5436                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5437                 if (likely(dma_desc_cnt))
5438                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5439                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5440                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5441                 new_dma_pkt->tsize = cpu_to_le32(len);
5442
5443                 if (dir == DMA_TO_DEVICE) {
5444                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5445                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5446                 } else {
5447                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5448                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5449                 }
5450
5451                 if (!user_memset)
5452                         device_memory_addr += len;
5453                 dma_desc_cnt++;
5454                 new_dma_pkt++;
5455         }
5456
5457         if (!dma_desc_cnt) {
5458                 dev_err(hdev->dev,
5459                         "Error of 0 SG entries when patching DMA packet\n");
5460                 return -EFAULT;
5461         }
5462
5463         /* Fix the last dma packet - wrcomp must be as user set it */
5464         new_dma_pkt--;
5465         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5466
5467         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5468
5469         return 0;
5470 }
5471
5472 static int gaudi_patch_cb(struct hl_device *hdev,
5473                                 struct hl_cs_parser *parser)
5474 {
5475         u32 cb_parsed_length = 0;
5476         u32 cb_patched_cur_length = 0;
5477         int rc = 0;
5478
5479         /* cb_user_size is more than 0 so loop will always be executed */
5480         while (cb_parsed_length < parser->user_cb_size) {
5481                 enum packet_id pkt_id;
5482                 u16 pkt_size;
5483                 u32 new_pkt_size = 0;
5484                 struct gaudi_packet *user_pkt, *kernel_pkt;
5485
5486                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5487                 kernel_pkt = parser->patched_cb->kernel_address +
5488                                         cb_patched_cur_length;
5489
5490                 pkt_id = (enum packet_id) (
5491                                 (le64_to_cpu(user_pkt->header) &
5492                                 PACKET_HEADER_PACKET_ID_MASK) >>
5493                                         PACKET_HEADER_PACKET_ID_SHIFT);
5494
5495                 if (!validate_packet_id(pkt_id)) {
5496                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5497                         rc = -EINVAL;
5498                         break;
5499                 }
5500
5501                 pkt_size = gaudi_packet_sizes[pkt_id];
5502                 cb_parsed_length += pkt_size;
5503                 if (cb_parsed_length > parser->user_cb_size) {
5504                         dev_err(hdev->dev,
5505                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5506                         rc = -EINVAL;
5507                         break;
5508                 }
5509
5510                 switch (pkt_id) {
5511                 case PACKET_LIN_DMA:
5512                         rc = gaudi_patch_dma_packet(hdev, parser,
5513                                         (struct packet_lin_dma *) user_pkt,
5514                                         (struct packet_lin_dma *) kernel_pkt,
5515                                         &new_pkt_size);
5516                         cb_patched_cur_length += new_pkt_size;
5517                         break;
5518
5519                 case PACKET_MSG_PROT:
5520                         dev_err(hdev->dev,
5521                                 "User not allowed to use MSG_PROT\n");
5522                         rc = -EPERM;
5523                         break;
5524
5525                 case PACKET_CP_DMA:
5526                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5527                         rc = -EPERM;
5528                         break;
5529
5530                 case PACKET_STOP:
5531                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5532                         rc = -EPERM;
5533                         break;
5534
5535                 case PACKET_WREG_32:
5536                 case PACKET_WREG_BULK:
5537                 case PACKET_MSG_LONG:
5538                 case PACKET_MSG_SHORT:
5539                 case PACKET_REPEAT:
5540                 case PACKET_FENCE:
5541                 case PACKET_NOP:
5542                 case PACKET_ARB_POINT:
5543                 case PACKET_LOAD_AND_EXE:
5544                         memcpy(kernel_pkt, user_pkt, pkt_size);
5545                         cb_patched_cur_length += pkt_size;
5546                         break;
5547
5548                 default:
5549                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5550                                 pkt_id);
5551                         rc = -EINVAL;
5552                         break;
5553                 }
5554
5555                 if (rc)
5556                         break;
5557         }
5558
5559         return rc;
5560 }
5561
5562 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5563                 struct hl_cs_parser *parser)
5564 {
5565         u64 patched_cb_handle;
5566         u32 patched_cb_size;
5567         struct hl_cb *user_cb;
5568         int rc;
5569
5570         /*
5571          * The new CB should have space at the end for two MSG_PROT pkt:
5572          * 1. A packet that will act as a completion packet
5573          * 2. A packet that will generate MSI interrupt
5574          */
5575         if (parser->completion)
5576                 parser->patched_cb_size = parser->user_cb_size +
5577                                 sizeof(struct packet_msg_prot) * 2;
5578         else
5579                 parser->patched_cb_size = parser->user_cb_size;
5580
5581         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5582                                 parser->patched_cb_size, false, false,
5583                                 &patched_cb_handle);
5584
5585         if (rc) {
5586                 dev_err(hdev->dev,
5587                         "Failed to allocate patched CB for DMA CS %d\n",
5588                         rc);
5589                 return rc;
5590         }
5591
5592         patched_cb_handle >>= PAGE_SHIFT;
5593         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5594                                 (u32) patched_cb_handle);
5595         /* hl_cb_get should never fail */
5596         if (!parser->patched_cb) {
5597                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5598                         (u32) patched_cb_handle);
5599                 rc = -EFAULT;
5600                 goto out;
5601         }
5602
5603         /*
5604          * The check that parser->user_cb_size <= parser->user_cb->size was done
5605          * in validate_queue_index().
5606          */
5607         memcpy(parser->patched_cb->kernel_address,
5608                 parser->user_cb->kernel_address,
5609                 parser->user_cb_size);
5610
5611         patched_cb_size = parser->patched_cb_size;
5612
5613         /* Validate patched CB instead of user CB */
5614         user_cb = parser->user_cb;
5615         parser->user_cb = parser->patched_cb;
5616         rc = gaudi_validate_cb(hdev, parser, true);
5617         parser->user_cb = user_cb;
5618
5619         if (rc) {
5620                 hl_cb_put(parser->patched_cb);
5621                 goto out;
5622         }
5623
5624         if (patched_cb_size != parser->patched_cb_size) {
5625                 dev_err(hdev->dev, "user CB size mismatch\n");
5626                 hl_cb_put(parser->patched_cb);
5627                 rc = -EINVAL;
5628                 goto out;
5629         }
5630
5631 out:
5632         /*
5633          * Always call cb destroy here because we still have 1 reference
5634          * to it by calling cb_get earlier. After the job will be completed,
5635          * cb_put will release it, but here we want to remove it from the
5636          * idr
5637          */
5638         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5639                                         patched_cb_handle << PAGE_SHIFT);
5640
5641         return rc;
5642 }
5643
5644 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5645                 struct hl_cs_parser *parser)
5646 {
5647         u64 patched_cb_handle;
5648         int rc;
5649
5650         rc = gaudi_validate_cb(hdev, parser, false);
5651
5652         if (rc)
5653                 goto free_userptr;
5654
5655         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5656                                 parser->patched_cb_size, false, false,
5657                                 &patched_cb_handle);
5658         if (rc) {
5659                 dev_err(hdev->dev,
5660                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5661                 goto free_userptr;
5662         }
5663
5664         patched_cb_handle >>= PAGE_SHIFT;
5665         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5666                                 (u32) patched_cb_handle);
5667         /* hl_cb_get should never fail here */
5668         if (!parser->patched_cb) {
5669                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5670                                 (u32) patched_cb_handle);
5671                 rc = -EFAULT;
5672                 goto out;
5673         }
5674
5675         rc = gaudi_patch_cb(hdev, parser);
5676
5677         if (rc)
5678                 hl_cb_put(parser->patched_cb);
5679
5680 out:
5681         /*
5682          * Always call cb destroy here because we still have 1 reference
5683          * to it by calling cb_get earlier. After the job will be completed,
5684          * cb_put will release it, but here we want to remove it from the
5685          * idr
5686          */
5687         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5688                                 patched_cb_handle << PAGE_SHIFT);
5689
5690 free_userptr:
5691         if (rc)
5692                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5693         return rc;
5694 }
5695
5696 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5697                                         struct hl_cs_parser *parser)
5698 {
5699         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5700         struct gaudi_device *gaudi = hdev->asic_specific;
5701         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5702                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5703
5704         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5705                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5706                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5707                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5708                                 parser->hw_queue_id);
5709                 return -EINVAL;
5710         }
5711
5712         /* For internal queue jobs just check if CB address is valid */
5713         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5714                                         parser->user_cb_size,
5715                                         asic_prop->sram_user_base_address,
5716                                         asic_prop->sram_end_address))
5717                 return 0;
5718
5719         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5720                                         parser->user_cb_size,
5721                                         asic_prop->dram_user_base_address,
5722                                         asic_prop->dram_end_address))
5723                 return 0;
5724
5725         /* PMMU and HPMMU addresses are equal, check only one of them */
5726         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5727                                         parser->user_cb_size,
5728                                         asic_prop->pmmu.start_addr,
5729                                         asic_prop->pmmu.end_addr))
5730                 return 0;
5731
5732         dev_err(hdev->dev,
5733                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5734                 parser->user_cb, parser->user_cb_size);
5735
5736         return -EFAULT;
5737 }
5738
5739 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5740 {
5741         struct gaudi_device *gaudi = hdev->asic_specific;
5742
5743         if (parser->queue_type == QUEUE_TYPE_INT)
5744                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5745
5746         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5747                 return gaudi_parse_cb_mmu(hdev, parser);
5748         else
5749                 return gaudi_parse_cb_no_mmu(hdev, parser);
5750 }
5751
5752 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5753                                         void *kernel_address, u32 len,
5754                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5755                                         bool eb)
5756 {
5757         struct gaudi_device *gaudi = hdev->asic_specific;
5758         struct packet_msg_prot *cq_pkt;
5759         u64 msi_addr;
5760         u32 tmp;
5761
5762         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5763
5764         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5765         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5766
5767         if (eb)
5768                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5769
5770         cq_pkt->ctl = cpu_to_le32(tmp);
5771         cq_pkt->value = cpu_to_le32(cq_val);
5772         cq_pkt->addr = cpu_to_le64(cq_addr);
5773
5774         cq_pkt++;
5775
5776         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5777         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5778         cq_pkt->ctl = cpu_to_le32(tmp);
5779         cq_pkt->value = cpu_to_le32(1);
5780
5781         if (gaudi->multi_msi_mode)
5782                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5783         else
5784                 msi_addr = mmPCIE_CORE_MSI_REQ;
5785
5786         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5787 }
5788
5789 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5790 {
5791         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5792 }
5793
5794 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5795                                         u32 size, u64 val)
5796 {
5797         struct packet_lin_dma *lin_dma_pkt;
5798         struct hl_cs_job *job;
5799         u32 cb_size, ctl, err_cause;
5800         struct hl_cb *cb;
5801         u64 id;
5802         int rc;
5803
5804         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5805         if (!cb)
5806                 return -EFAULT;
5807
5808         lin_dma_pkt = cb->kernel_address;
5809         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5810         cb_size = sizeof(*lin_dma_pkt);
5811
5812         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5813         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5814         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5815         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5816         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5817
5818         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5819         lin_dma_pkt->src_addr = cpu_to_le64(val);
5820         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5821         lin_dma_pkt->tsize = cpu_to_le32(size);
5822
5823         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5824         if (!job) {
5825                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5826                 rc = -ENOMEM;
5827                 goto release_cb;
5828         }
5829
5830         /* Verify DMA is OK */
5831         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5832         if (err_cause && !hdev->init_done) {
5833                 dev_dbg(hdev->dev,
5834                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5835                         err_cause);
5836                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5837         }
5838
5839         job->id = 0;
5840         job->user_cb = cb;
5841         atomic_inc(&job->user_cb->cs_cnt);
5842         job->user_cb_size = cb_size;
5843         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5844         job->patched_cb = job->user_cb;
5845         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5846
5847         hl_debugfs_add_job(hdev, job);
5848
5849         rc = gaudi_send_job_on_qman0(hdev, job);
5850         hl_debugfs_remove_job(hdev, job);
5851         kfree(job);
5852         atomic_dec(&cb->cs_cnt);
5853
5854         /* Verify DMA is OK */
5855         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5856         if (err_cause) {
5857                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5858                 rc = -EIO;
5859                 if (!hdev->init_done) {
5860                         dev_dbg(hdev->dev,
5861                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5862                                 err_cause);
5863                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5864                 }
5865         }
5866
5867 release_cb:
5868         id = cb->id;
5869         hl_cb_put(cb);
5870         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5871
5872         return rc;
5873 }
5874
5875 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5876                                         u32 num_regs, u32 val)
5877 {
5878         struct packet_msg_long *pkt;
5879         struct hl_cs_job *job;
5880         u32 cb_size, ctl;
5881         struct hl_cb *cb;
5882         int i, rc;
5883
5884         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5885
5886         if (cb_size > SZ_2M) {
5887                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5888                 return -ENOMEM;
5889         }
5890
5891         cb = hl_cb_kernel_create(hdev, cb_size, false);
5892         if (!cb)
5893                 return -EFAULT;
5894
5895         pkt = cb->kernel_address;
5896
5897         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5898         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5899         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5900         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5901         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5902
5903         for (i = 0; i < num_regs ; i++, pkt++) {
5904                 pkt->ctl = cpu_to_le32(ctl);
5905                 pkt->value = cpu_to_le32(val);
5906                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5907         }
5908
5909         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5910         if (!job) {
5911                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5912                 rc = -ENOMEM;
5913                 goto release_cb;
5914         }
5915
5916         job->id = 0;
5917         job->user_cb = cb;
5918         atomic_inc(&job->user_cb->cs_cnt);
5919         job->user_cb_size = cb_size;
5920         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5921         job->patched_cb = job->user_cb;
5922         job->job_cb_size = cb_size;
5923
5924         hl_debugfs_add_job(hdev, job);
5925
5926         rc = gaudi_send_job_on_qman0(hdev, job);
5927         hl_debugfs_remove_job(hdev, job);
5928         kfree(job);
5929         atomic_dec(&cb->cs_cnt);
5930
5931 release_cb:
5932         hl_cb_put(cb);
5933         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5934
5935         return rc;
5936 }
5937
5938 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5939 {
5940         u64 base_addr;
5941         u32 num_regs;
5942         int rc;
5943
5944         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5945         num_regs = NUM_OF_SOB_IN_BLOCK;
5946         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5947         if (rc) {
5948                 dev_err(hdev->dev, "failed resetting SM registers");
5949                 return -ENOMEM;
5950         }
5951
5952         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5953         num_regs = NUM_OF_SOB_IN_BLOCK;
5954         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5955         if (rc) {
5956                 dev_err(hdev->dev, "failed resetting SM registers");
5957                 return -ENOMEM;
5958         }
5959
5960         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5961         num_regs = NUM_OF_SOB_IN_BLOCK;
5962         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5963         if (rc) {
5964                 dev_err(hdev->dev, "failed resetting SM registers");
5965                 return -ENOMEM;
5966         }
5967
5968         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5969         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5970         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5971         if (rc) {
5972                 dev_err(hdev->dev, "failed resetting SM registers");
5973                 return -ENOMEM;
5974         }
5975
5976         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5977         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5978         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5979         if (rc) {
5980                 dev_err(hdev->dev, "failed resetting SM registers");
5981                 return -ENOMEM;
5982         }
5983
5984         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5985         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5986         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5987         if (rc) {
5988                 dev_err(hdev->dev, "failed resetting SM registers");
5989                 return -ENOMEM;
5990         }
5991
5992         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5993                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5994         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5995         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5996         if (rc) {
5997                 dev_err(hdev->dev, "failed resetting SM registers");
5998                 return -ENOMEM;
5999         }
6000
6001         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6002                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6003         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6004         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6005         if (rc) {
6006                 dev_err(hdev->dev, "failed resetting SM registers");
6007                 return -ENOMEM;
6008         }
6009
6010         return 0;
6011 }
6012
6013 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6014 {
6015         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6016                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6017         int i;
6018
6019         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6020                 u64 sob_addr = CFG_BASE +
6021                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6022                                 (i * sob_delta);
6023                 u32 dma_offset = i * DMA_CORE_OFFSET;
6024
6025                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6026                                 lower_32_bits(sob_addr));
6027                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6028                                 upper_32_bits(sob_addr));
6029                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6030
6031                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6032                  * modified by the user for SRAM reduction
6033                  */
6034                 if (i > 1)
6035                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6036                                                                 0x00000001);
6037         }
6038 }
6039
6040 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6041 {
6042         u32 qman_offset;
6043         int i;
6044
6045         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6046                 qman_offset = i * DMA_QMAN_OFFSET;
6047                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6048         }
6049
6050         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6051                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6052                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6053         }
6054
6055         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6056                 qman_offset = i * TPC_QMAN_OFFSET;
6057                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6058         }
6059
6060         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6061                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6062                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6063                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6064         }
6065 }
6066
6067 static int gaudi_restore_user_registers(struct hl_device *hdev)
6068 {
6069         int rc;
6070
6071         rc = gaudi_restore_sm_registers(hdev);
6072         if (rc)
6073                 return rc;
6074
6075         gaudi_restore_dma_registers(hdev);
6076         gaudi_restore_qm_registers(hdev);
6077
6078         return 0;
6079 }
6080
6081 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6082 {
6083         return 0;
6084 }
6085
6086 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6087 {
6088         struct asic_fixed_properties *prop = &hdev->asic_prop;
6089         struct gaudi_device *gaudi = hdev->asic_specific;
6090         u64 addr = prop->mmu_pgt_addr;
6091         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6092
6093         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6094                 return 0;
6095
6096         return gaudi_memset_device_memory(hdev, addr, size, 0);
6097 }
6098
6099 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6100 {
6101
6102 }
6103
6104 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6105                         bool user_address, u32 *val)
6106 {
6107         struct asic_fixed_properties *prop = &hdev->asic_prop;
6108         u64 hbm_bar_addr, host_phys_end;
6109         int rc = 0;
6110
6111         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6112
6113         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6114
6115                 *val = RREG32(addr - CFG_BASE);
6116
6117         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6118
6119                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6120
6121         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6122
6123                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6124
6125                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6126
6127                 if (hbm_bar_addr != U64_MAX) {
6128                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6129                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6130                 }
6131
6132                 if (hbm_bar_addr == U64_MAX)
6133                         rc = -EIO;
6134
6135         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6136                         user_address && !iommu_present(&pci_bus_type)) {
6137
6138                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6139
6140         } else {
6141                 rc = -EFAULT;
6142         }
6143
6144         return rc;
6145 }
6146
6147 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6148                         bool user_address, u32 val)
6149 {
6150         struct asic_fixed_properties *prop = &hdev->asic_prop;
6151         u64 hbm_bar_addr, host_phys_end;
6152         int rc = 0;
6153
6154         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6155
6156         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6157
6158                 WREG32(addr - CFG_BASE, val);
6159
6160         } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6161
6162                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6163
6164         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6165
6166                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6167
6168                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6169
6170                 if (hbm_bar_addr != U64_MAX) {
6171                         writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6172                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6173                 }
6174
6175                 if (hbm_bar_addr == U64_MAX)
6176                         rc = -EIO;
6177
6178         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6179                         user_address && !iommu_present(&pci_bus_type)) {
6180
6181                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6182
6183         } else {
6184                 rc = -EFAULT;
6185         }
6186
6187         return rc;
6188 }
6189
6190 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6191                                 bool user_address, u64 *val)
6192 {
6193         struct asic_fixed_properties *prop = &hdev->asic_prop;
6194         u64 hbm_bar_addr, host_phys_end;
6195         int rc = 0;
6196
6197         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6198
6199         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6200
6201                 u32 val_l = RREG32(addr - CFG_BASE);
6202                 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6203
6204                 *val = (((u64) val_h) << 32) | val_l;
6205
6206         } else if ((addr >= SRAM_BASE_ADDR) &&
6207                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6208
6209                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6210
6211         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6212
6213                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6214
6215                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6216
6217                 if (hbm_bar_addr != U64_MAX) {
6218                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6219                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6220                 }
6221
6222                 if (hbm_bar_addr == U64_MAX)
6223                         rc = -EIO;
6224
6225         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6226                         user_address && !iommu_present(&pci_bus_type)) {
6227
6228                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6229
6230         } else {
6231                 rc = -EFAULT;
6232         }
6233
6234         return rc;
6235 }
6236
6237 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6238                                 bool user_address, u64 val)
6239 {
6240         struct asic_fixed_properties *prop = &hdev->asic_prop;
6241         u64 hbm_bar_addr, host_phys_end;
6242         int rc = 0;
6243
6244         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6245
6246         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6247
6248                 WREG32(addr - CFG_BASE, lower_32_bits(val));
6249                 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
6250
6251         } else if ((addr >= SRAM_BASE_ADDR) &&
6252                         (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6253
6254                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR));
6255
6256         } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6257
6258                 u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6259
6260                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6261
6262                 if (hbm_bar_addr != U64_MAX) {
6263                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr));
6264                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr);
6265                 }
6266
6267                 if (hbm_bar_addr == U64_MAX)
6268                         rc = -EIO;
6269
6270         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6271                         user_address && !iommu_present(&pci_bus_type)) {
6272
6273                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6274
6275         } else {
6276                 rc = -EFAULT;
6277         }
6278
6279         return rc;
6280 }
6281
6282 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6283                                         u32 size_to_dma, dma_addr_t dma_addr)
6284 {
6285         u32 err_cause, val;
6286         u64 dma_offset;
6287         int rc;
6288
6289         dma_offset = dma_id * DMA_CORE_OFFSET;
6290
6291         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6292         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6293         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6294         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6295         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6296         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6297                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6298
6299         rc = hl_poll_timeout(
6300                 hdev,
6301                 mmDMA0_CORE_STS0 + dma_offset,
6302                 val,
6303                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6304                 0,
6305                 1000000);
6306
6307         if (rc) {
6308                 dev_err(hdev->dev,
6309                         "DMA %d timed-out during reading of 0x%llx\n",
6310                         dma_id, addr);
6311                 return -EIO;
6312         }
6313
6314         /* Verify DMA is OK */
6315         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6316         if (err_cause) {
6317                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6318                 dev_dbg(hdev->dev,
6319                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6320                         err_cause);
6321                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6322
6323                 return -EIO;
6324         }
6325
6326         return 0;
6327 }
6328
6329 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6330                                 void *blob_addr)
6331 {
6332         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6333         u32 qm_glbl_sts0, qm_cgm_sts;
6334         u64 dma_offset, qm_offset;
6335         dma_addr_t dma_addr;
6336         void *kernel_addr;
6337         bool is_eng_idle;
6338         int rc = 0, dma_id;
6339
6340         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6341                                                 hdev, SZ_2M,
6342                                                 &dma_addr,
6343                                                 GFP_KERNEL | __GFP_ZERO);
6344
6345         if (!kernel_addr)
6346                 return -ENOMEM;
6347
6348         hdev->asic_funcs->hw_queues_lock(hdev);
6349
6350         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6351         dma_offset = dma_id * DMA_CORE_OFFSET;
6352         qm_offset = dma_id * DMA_QMAN_OFFSET;
6353         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6354         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6355         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6356         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6357                       IS_DMA_IDLE(dma_core_sts0);
6358
6359         if (!is_eng_idle) {
6360                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6361                 dma_offset = dma_id * DMA_CORE_OFFSET;
6362                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6363                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6364                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6365                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6366                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6367                               IS_DMA_IDLE(dma_core_sts0);
6368
6369                 if (!is_eng_idle) {
6370                         dev_err_ratelimited(hdev->dev,
6371                                 "Can't read via DMA because it is BUSY\n");
6372                         rc = -EAGAIN;
6373                         goto out;
6374                 }
6375         }
6376
6377         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6378         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6379                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6380
6381         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6382          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6383          * ASID
6384          */
6385         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6386
6387         /* Verify DMA is OK */
6388         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6389         if (err_cause) {
6390                 dev_dbg(hdev->dev,
6391                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6392                         err_cause);
6393                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6394         }
6395
6396         pos = 0;
6397         size_left = size;
6398         size_to_dma = SZ_2M;
6399
6400         while (size_left > 0) {
6401
6402                 if (size_left < SZ_2M)
6403                         size_to_dma = size_left;
6404
6405                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6406                                                 dma_addr);
6407                 if (rc)
6408                         break;
6409
6410                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6411
6412                 if (size_left <= SZ_2M)
6413                         break;
6414
6415                 pos += SZ_2M;
6416                 addr += SZ_2M;
6417                 size_left -= SZ_2M;
6418         }
6419
6420         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6421          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6422          * ASID
6423          */
6424         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6425                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6426
6427         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6428
6429 out:
6430         hdev->asic_funcs->hw_queues_unlock(hdev);
6431
6432         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6433                                                 dma_addr);
6434
6435         return rc;
6436 }
6437
6438 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6439 {
6440         struct gaudi_device *gaudi = hdev->asic_specific;
6441
6442         if (hdev->reset_info.hard_reset_pending)
6443                 return U64_MAX;
6444
6445         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6446                         (addr - gaudi->hbm_bar_cur_addr));
6447 }
6448
6449 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6450 {
6451         struct gaudi_device *gaudi = hdev->asic_specific;
6452
6453         if (hdev->reset_info.hard_reset_pending)
6454                 return;
6455
6456         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6457                         (addr - gaudi->hbm_bar_cur_addr));
6458 }
6459
6460 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6461 {
6462         /* mask to zero the MMBP and ASID bits */
6463         WREG32_AND(reg, ~0x7FF);
6464         WREG32_OR(reg, asid);
6465 }
6466
6467 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6468 {
6469         struct gaudi_device *gaudi = hdev->asic_specific;
6470
6471         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6472                 return;
6473
6474         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6475                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6476                 return;
6477         }
6478
6479         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6480         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6481         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6482         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6483         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6484
6485         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6486         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6487         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6488         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6489         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6490
6491         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6492         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6493         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6494         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6495         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6496
6497         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6498         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6499         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6500         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6501         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6502
6503         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6504         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6505         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6506         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6507         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6508
6509         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6510         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6511         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6512         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6513         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6514
6515         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6516         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6517         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6518         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6519         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6520
6521         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6522         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6523         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6524         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6525         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6526
6527         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6528         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6529         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6530         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6531         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6532         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6533         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6534         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6535
6536         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6537         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6538         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6539         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6540         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6541         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6542         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6543
6544         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6545         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6547         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6548         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6550         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6551
6552         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6555         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6556         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6557         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6559
6560         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6562         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6563         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6567
6568         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6575
6576         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6583
6584         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6591
6592         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6599
6600         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6610
6611         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6623
6624         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6625                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6626                                 asid);
6627                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6628                                 asid);
6629                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6630                                 asid);
6631                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6632                                 asid);
6633                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6634                                 asid);
6635         }
6636
6637         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6638                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6639                                 asid);
6640                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6641                                 asid);
6642                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6643                                 asid);
6644                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6645                                 asid);
6646                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6647                                 asid);
6648         }
6649
6650         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6651                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6652                                 asid);
6653                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6654                                 asid);
6655                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6656                                 asid);
6657                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6658                                 asid);
6659                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6660                                 asid);
6661         }
6662
6663         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6664                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6665                                 asid);
6666                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6667                                 asid);
6668                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6669                                 asid);
6670                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6671                                 asid);
6672                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6673                                 asid);
6674         }
6675
6676         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6677                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6678                                 asid);
6679                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6680                                 asid);
6681                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6682                                 asid);
6683                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6684                                 asid);
6685                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6686                                 asid);
6687         }
6688
6689         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6690                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6691                                 asid);
6692                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6693                                 asid);
6694                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6695                                 asid);
6696                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6697                                 asid);
6698                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6699                                 asid);
6700         }
6701
6702         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6703                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6704                                 asid);
6705                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6706                                 asid);
6707                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6708                                 asid);
6709                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6710                                 asid);
6711                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6712                                 asid);
6713         }
6714
6715         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6719                                 asid);
6720                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6721                                 asid);
6722                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6723                                 asid);
6724                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6725                                 asid);
6726         }
6727
6728         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6729                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6730                                 asid);
6731                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6732                                 asid);
6733                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6734                                 asid);
6735                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6736                                 asid);
6737                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6738                                 asid);
6739         }
6740
6741         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6745                                 asid);
6746                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6747                                 asid);
6748                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6749                                 asid);
6750                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6751                                 asid);
6752         }
6753
6754         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6755         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6756 }
6757
6758 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6759                 struct hl_cs_job *job)
6760 {
6761         struct packet_msg_prot *fence_pkt;
6762         u32 *fence_ptr;
6763         dma_addr_t fence_dma_addr;
6764         struct hl_cb *cb;
6765         u32 tmp, timeout, dma_offset;
6766         int rc;
6767
6768         if (hdev->pldm)
6769                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6770         else
6771                 timeout = HL_DEVICE_TIMEOUT_USEC;
6772
6773         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6774                 dev_err_ratelimited(hdev->dev,
6775                         "Can't send driver job on QMAN0 because the device is not idle\n");
6776                 return -EBUSY;
6777         }
6778
6779         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6780                                                         &fence_dma_addr);
6781         if (!fence_ptr) {
6782                 dev_err(hdev->dev,
6783                         "Failed to allocate fence memory for QMAN0\n");
6784                 return -ENOMEM;
6785         }
6786
6787         cb = job->patched_cb;
6788
6789         fence_pkt = cb->kernel_address +
6790                         job->job_cb_size - sizeof(struct packet_msg_prot);
6791
6792         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6793         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6794         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6795
6796         fence_pkt->ctl = cpu_to_le32(tmp);
6797         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6798         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6799
6800         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6801
6802         WREG32(mmDMA0_CORE_PROT + dma_offset,
6803                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6804
6805         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6806                                         job->job_cb_size, cb->bus_address);
6807         if (rc) {
6808                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6809                 goto free_fence_ptr;
6810         }
6811
6812         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6813                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6814                                 timeout, true);
6815
6816         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6817
6818         if (rc == -ETIMEDOUT) {
6819                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6820                 goto free_fence_ptr;
6821         }
6822
6823 free_fence_ptr:
6824         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6825
6826         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6827                                         fence_dma_addr);
6828         return rc;
6829 }
6830
6831 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6832 {
6833         if (event_type >= GAUDI_EVENT_SIZE)
6834                 goto event_not_supported;
6835
6836         if (!gaudi_irq_map_table[event_type].valid)
6837                 goto event_not_supported;
6838
6839         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6840
6841         return;
6842
6843 event_not_supported:
6844         snprintf(desc, size, "N/A");
6845 }
6846
6847 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6848                                                         bool is_write, s32 *engine_id_1,
6849                                                         s32 *engine_id_2)
6850 {
6851         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6852
6853         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6854                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6855
6856         switch (x_y) {
6857         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6858         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6859                 dma_id[0] = 0;
6860                 dma_id[1] = 2;
6861                 break;
6862         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6863         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6864                 dma_id[0] = 1;
6865                 dma_id[1] = 3;
6866                 break;
6867         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6868         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6869                 dma_id[0] = 4;
6870                 dma_id[1] = 6;
6871                 break;
6872         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6873         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6874                 dma_id[0] = 5;
6875                 dma_id[1] = 7;
6876                 break;
6877         default:
6878                 goto unknown_initiator;
6879         }
6880
6881         for (i = 0 ; i < 2 ; i++) {
6882                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6883                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6884         }
6885
6886         switch (x_y) {
6887         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6888         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6889                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6890                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6891                         return "DMA0";
6892                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6893                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6894                         return "DMA2";
6895                 } else {
6896                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6897                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6898                         return "DMA0 or DMA2";
6899                 }
6900         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6901         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6902                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6903                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6904                         return "DMA1";
6905                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6906                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6907                         return "DMA3";
6908                 } else {
6909                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6910                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6911                         return "DMA1 or DMA3";
6912                 }
6913         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6914         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6915                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6916                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6917                         return "DMA4";
6918                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6919                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6920                         return "DMA6";
6921                 } else {
6922                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6923                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6924                         return "DMA4 or DMA6";
6925                 }
6926         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6927         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6928                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6929                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6930                         return "DMA5";
6931                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6932                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6933                         return "DMA7";
6934                 } else {
6935                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6936                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6937                         return "DMA5 or DMA7";
6938                 }
6939         }
6940
6941 unknown_initiator:
6942         return "unknown initiator";
6943 }
6944
6945 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6946                                                         u32 *engine_id_1, u32 *engine_id_2)
6947 {
6948         u32 val, x_y, axi_id;
6949
6950         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6951                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6952         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6953                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6954         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6955                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6956
6957         switch (x_y) {
6958         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6959                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6960                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6961                         return "TPC0";
6962                 }
6963                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6964                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6965                         return "NIC0";
6966                 }
6967                 break;
6968         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6969                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6970                 return "TPC1";
6971         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6972         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6973                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6974                 return "MME0";
6975         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6976         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6977                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6978                 return "MME1";
6979         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6980                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6981                 return "TPC2";
6982         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6983                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6984                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6985                         return "TPC3";
6986                 }
6987                 /* PCI, CPU or PSOC does not have engine id*/
6988                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6989                         return "PCI";
6990                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6991                         return "CPU";
6992                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6993                         return "PSOC";
6994                 break;
6995         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6996         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6997         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6998         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6999         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7000         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7001         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7002         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7003                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
7004                                 engine_id_1, engine_id_2);
7005         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7006                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7007                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
7008                         return "TPC4";
7009                 }
7010                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7011                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
7012                         return "NIC1";
7013                 }
7014                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7015                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
7016                         return "NIC2";
7017                 }
7018                 break;
7019         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7020                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
7021                 return "TPC5";
7022         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7023         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7024                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
7025                 return "MME2";
7026         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7027         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7028                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
7029                 return "MME3";
7030         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7031                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
7032                 return "TPC6";
7033         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7034                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
7035                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
7036                         return "TPC7";
7037                 }
7038                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
7039                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
7040                         return "NIC4";
7041                 }
7042                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
7043                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
7044                         return "NIC5";
7045                 }
7046                 break;
7047         default:
7048                 break;
7049         }
7050
7051         dev_err(hdev->dev,
7052                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7053                 val,
7054                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7055                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7056                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7057                         RAZWI_INITIATOR_AXI_ID_MASK);
7058
7059         return "unknown initiator";
7060 }
7061
7062 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
7063                                                 u32 *engine_id_2)
7064 {
7065
7066         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7067                 dev_err_ratelimited(hdev->dev,
7068                         "RAZWI event caused by illegal write of %s\n",
7069                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
7070                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7071         }
7072
7073         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7074                 dev_err_ratelimited(hdev->dev,
7075                         "RAZWI event caused by illegal read of %s\n",
7076                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
7077                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7078         }
7079 }
7080
7081 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
7082 {
7083         struct gaudi_device *gaudi = hdev->asic_specific;
7084         u32 val;
7085
7086         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7087                 return;
7088
7089         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7090         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7091                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7092                 *addr <<= 32;
7093                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7094
7095                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
7096                 *type = HL_RAZWI_PAGE_FAULT;
7097
7098                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7099         }
7100
7101         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7102         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7103                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7104                 *addr <<= 32;
7105                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7106
7107                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
7108                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
7109
7110                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7111         }
7112 }
7113
7114 /*
7115  *  +-------------------+------------------------------------------------------+
7116  *  | Configuration Reg |                     Description                      |
7117  *  |      Address      |                                                      |
7118  *  +-------------------+------------------------------------------------------+
7119  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7120  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7121  *  |                   |0xF34 memory wrappers 63:32                           |
7122  *  |                   |0xF38 memory wrappers 95:64                           |
7123  *  |                   |0xF3C memory wrappers 127:96                          |
7124  *  +-------------------+------------------------------------------------------+
7125  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7126  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7127  *  |                   |0xF44 memory wrappers 63:32                           |
7128  *  |                   |0xF48 memory wrappers 95:64                           |
7129  *  |                   |0xF4C memory wrappers 127:96                          |
7130  *  +-------------------+------------------------------------------------------+
7131  */
7132 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7133                 struct ecc_info_extract_params *params, u64 *ecc_address,
7134                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7135 {
7136         u32 i, num_mem_regs, reg, err_bit;
7137         u64 err_addr, err_word = 0;
7138
7139         num_mem_regs = params->num_memories / 32 +
7140                         ((params->num_memories % 32) ? 1 : 0);
7141
7142         if (params->block_address >= CFG_BASE)
7143                 params->block_address -= CFG_BASE;
7144
7145         if (params->derr)
7146                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7147         else
7148                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7149
7150         /* Set invalid wrapper index */
7151         *memory_wrapper_idx = 0xFF;
7152
7153         /* Iterate through memory wrappers, a single bit must be set */
7154         for (i = 0 ; i < num_mem_regs ; i++) {
7155                 err_addr += i * 4;
7156                 err_word = RREG32(err_addr);
7157                 if (err_word) {
7158                         err_bit = __ffs(err_word);
7159                         *memory_wrapper_idx = err_bit + (32 * i);
7160                         break;
7161                 }
7162         }
7163
7164         if (*memory_wrapper_idx == 0xFF) {
7165                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7166                 return -EINVAL;
7167         }
7168
7169         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7170                         *memory_wrapper_idx);
7171
7172         *ecc_address =
7173                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7174         *ecc_syndrom =
7175                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7176
7177         /* Clear error indication */
7178         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7179         if (params->derr)
7180                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7181         else
7182                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7183
7184         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7185
7186         return 0;
7187 }
7188
7189 /*
7190  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7191  *
7192  * @idx: the current pi/ci value
7193  * @q_len: the queue length (power of 2)
7194  *
7195  * @return the cyclically decremented index
7196  */
7197 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7198 {
7199         u32 mask = q_len - 1;
7200
7201         /*
7202          * modular decrement is equivalent to adding (queue_size -1)
7203          * later we take LSBs to make sure the value is in the
7204          * range [0, queue_len - 1]
7205          */
7206         return (idx + q_len - 1) & mask;
7207 }
7208
7209 /**
7210  * gaudi_print_sw_config_stream_data - print SW config stream data
7211  *
7212  * @hdev: pointer to the habanalabs device structure
7213  * @stream: the QMAN's stream
7214  * @qman_base: base address of QMAN registers block
7215  */
7216 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7217                                                 u64 qman_base)
7218 {
7219         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7220         u32 cq_ptr_lo_off, size;
7221
7222         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7223
7224         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7225                                                 stream * cq_ptr_lo_off;
7226         cq_ptr_hi = cq_ptr_lo +
7227                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7228         cq_tsize = cq_ptr_lo +
7229                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7230
7231         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7232         size = RREG32(cq_tsize);
7233         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
7234                                                         stream, cq_ptr, size);
7235 }
7236
7237 /**
7238  * gaudi_print_last_pqes_on_err - print last PQEs on error
7239  *
7240  * @hdev: pointer to the habanalabs device structure
7241  * @qid_base: first QID of the QMAN (out of 4 streams)
7242  * @stream: the QMAN's stream
7243  * @qman_base: base address of QMAN registers block
7244  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7245  */
7246 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7247                                                 u32 stream, u64 qman_base,
7248                                                 bool pr_sw_conf)
7249 {
7250         u32 ci, qm_ci_stream_off, queue_len;
7251         struct hl_hw_queue *q;
7252         u64 pq_ci;
7253         int i;
7254
7255         q = &hdev->kernel_queues[qid_base + stream];
7256
7257         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7258         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7259                                                 stream * qm_ci_stream_off;
7260
7261         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7262                                         q->int_queue_len : HL_QUEUE_LENGTH;
7263
7264         hdev->asic_funcs->hw_queues_lock(hdev);
7265
7266         if (pr_sw_conf)
7267                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7268
7269         ci = RREG32(pq_ci);
7270
7271         /* we should start printing form ci -1 */
7272         ci = gaudi_queue_idx_dec(ci, queue_len);
7273
7274         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7275                 struct hl_bd *bd;
7276                 u64 addr;
7277                 u32 len;
7278
7279                 bd = q->kernel_address;
7280                 bd += ci;
7281
7282                 len = le32_to_cpu(bd->len);
7283                 /* len 0 means uninitialized entry- break */
7284                 if (!len)
7285                         break;
7286
7287                 addr = le64_to_cpu(bd->ptr);
7288
7289                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
7290                                                         stream, ci, addr, len);
7291
7292                 /* get previous ci, wrap if needed */
7293                 ci = gaudi_queue_idx_dec(ci, queue_len);
7294         }
7295
7296         hdev->asic_funcs->hw_queues_unlock(hdev);
7297 }
7298
7299 /**
7300  * print_qman_data_on_err - extract QMAN data on error
7301  *
7302  * @hdev: pointer to the habanalabs device structure
7303  * @qid_base: first QID of the QMAN (out of 4 streams)
7304  * @stream: the QMAN's stream
7305  * @qman_base: base address of QMAN registers block
7306  *
7307  * This function attempt to exatract as much data as possible on QMAN error.
7308  * On upper CP print the SW config stream data and last 8 PQEs.
7309  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7310  */
7311 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7312                                                 u32 stream, u64 qman_base)
7313 {
7314         u32 i;
7315
7316         if (stream != QMAN_STREAMS) {
7317                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7318                                                                         true);
7319                 return;
7320         }
7321
7322         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7323
7324         for (i = 0; i < QMAN_STREAMS; i++)
7325                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7326                                                                         false);
7327 }
7328
7329 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7330                                           const char *qm_name,
7331                                           u64 qman_base,
7332                                           u32 qid_base)
7333 {
7334         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7335         u64 glbl_sts_addr, arb_err_addr;
7336         char reg_desc[32];
7337
7338         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7339         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7340
7341         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7342         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7343                 glbl_sts_clr_val = 0;
7344                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7345
7346                 if (!glbl_sts_val)
7347                         continue;
7348
7349                 if (i == QMAN_STREAMS)
7350                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7351                 else
7352                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7353
7354                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7355                         if (glbl_sts_val & BIT(j)) {
7356                                 dev_err_ratelimited(hdev->dev,
7357                                                 "%s %s. err cause: %s\n",
7358                                                 qm_name, reg_desc,
7359                                                 gaudi_qman_error_cause[j]);
7360                                 glbl_sts_clr_val |= BIT(j);
7361                         }
7362                 }
7363
7364                 /* Write 1 clear errors */
7365                 if (!hdev->stop_on_err)
7366                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7367                 else
7368                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7369         }
7370
7371         arb_err_val = RREG32(arb_err_addr);
7372
7373         if (!arb_err_val)
7374                 return;
7375
7376         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7377                 if (arb_err_val & BIT(j)) {
7378                         dev_err_ratelimited(hdev->dev,
7379                                         "%s ARB_ERR. err cause: %s\n",
7380                                         qm_name,
7381                                         gaudi_qman_arb_error_cause[j]);
7382                 }
7383         }
7384 }
7385
7386 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7387                 struct hl_eq_sm_sei_data *sei_data)
7388 {
7389         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7390
7391         /* Flip the bits as the enum is ordered in the opposite way */
7392         index = (index ^ 0x3) & 0x3;
7393
7394         switch (sei_data->sei_cause) {
7395         case SM_SEI_SO_OVERFLOW:
7396                 dev_err_ratelimited(hdev->dev,
7397                         "%s SEI Error: SOB Group %u overflow/underflow",
7398                         gaudi_sync_manager_names[index],
7399                         le32_to_cpu(sei_data->sei_log));
7400                 break;
7401         case SM_SEI_LBW_4B_UNALIGNED:
7402                 dev_err_ratelimited(hdev->dev,
7403                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7404                         gaudi_sync_manager_names[index],
7405                         le32_to_cpu(sei_data->sei_log));
7406                 break;
7407         case SM_SEI_AXI_RESPONSE_ERR:
7408                 dev_err_ratelimited(hdev->dev,
7409                         "%s SEI Error: AXI ID %u response error",
7410                         gaudi_sync_manager_names[index],
7411                         le32_to_cpu(sei_data->sei_log));
7412                 break;
7413         default:
7414                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7415                                 le32_to_cpu(sei_data->sei_log));
7416                 break;
7417         }
7418 }
7419
7420 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7421                 struct hl_eq_ecc_data *ecc_data)
7422 {
7423         struct ecc_info_extract_params params;
7424         u64 ecc_address = 0, ecc_syndrom = 0;
7425         u8 index, memory_wrapper_idx = 0;
7426         bool extract_info_from_fw;
7427         int rc;
7428
7429         if (hdev->asic_prop.fw_security_enabled) {
7430                 extract_info_from_fw = true;
7431                 goto extract_ecc_info;
7432         }
7433
7434         switch (event_type) {
7435         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7436         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7437                 extract_info_from_fw = true;
7438                 break;
7439         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7440                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7441                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7442                 params.num_memories = 90;
7443                 params.derr = false;
7444                 extract_info_from_fw = false;
7445                 break;
7446         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7447                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7448                 params.block_address =
7449                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7450                 params.num_memories = 90;
7451                 params.derr = true;
7452                 extract_info_from_fw = false;
7453                 break;
7454         case GAUDI_EVENT_MME0_ACC_SERR:
7455         case GAUDI_EVENT_MME1_ACC_SERR:
7456         case GAUDI_EVENT_MME2_ACC_SERR:
7457         case GAUDI_EVENT_MME3_ACC_SERR:
7458                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7459                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7460                 params.num_memories = 128;
7461                 params.derr = false;
7462                 extract_info_from_fw = false;
7463                 break;
7464         case GAUDI_EVENT_MME0_ACC_DERR:
7465         case GAUDI_EVENT_MME1_ACC_DERR:
7466         case GAUDI_EVENT_MME2_ACC_DERR:
7467         case GAUDI_EVENT_MME3_ACC_DERR:
7468                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7469                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7470                 params.num_memories = 128;
7471                 params.derr = true;
7472                 extract_info_from_fw = false;
7473                 break;
7474         case GAUDI_EVENT_MME0_SBAB_SERR:
7475         case GAUDI_EVENT_MME1_SBAB_SERR:
7476         case GAUDI_EVENT_MME2_SBAB_SERR:
7477         case GAUDI_EVENT_MME3_SBAB_SERR:
7478                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7479                 params.block_address =
7480                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7481                 params.num_memories = 33;
7482                 params.derr = false;
7483                 extract_info_from_fw = false;
7484                 break;
7485         case GAUDI_EVENT_MME0_SBAB_DERR:
7486         case GAUDI_EVENT_MME1_SBAB_DERR:
7487         case GAUDI_EVENT_MME2_SBAB_DERR:
7488         case GAUDI_EVENT_MME3_SBAB_DERR:
7489                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7490                 params.block_address =
7491                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7492                 params.num_memories = 33;
7493                 params.derr = true;
7494                 extract_info_from_fw = false;
7495                 break;
7496         default:
7497                 return;
7498         }
7499
7500 extract_ecc_info:
7501         if (extract_info_from_fw) {
7502                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7503                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7504                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7505         } else {
7506                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7507                                 &ecc_syndrom, &memory_wrapper_idx);
7508                 if (rc)
7509                         return;
7510         }
7511
7512         dev_err(hdev->dev,
7513                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7514                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7515 }
7516
7517 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7518 {
7519         u64 qman_base;
7520         char desc[32];
7521         u32 qid_base;
7522         u8 index;
7523
7524         switch (event_type) {
7525         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7526                 index = event_type - GAUDI_EVENT_TPC0_QM;
7527                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7528                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7529                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7530                 break;
7531         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7532                 index = event_type - GAUDI_EVENT_MME0_QM;
7533                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7534                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7535                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7536                 break;
7537         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7538                 index = event_type - GAUDI_EVENT_DMA0_QM;
7539                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7540                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7541                 if (index > 1)
7542                         qid_base++;
7543                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7544                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7545                 break;
7546         case GAUDI_EVENT_NIC0_QM0:
7547                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7548                 qman_base = mmNIC0_QM0_BASE;
7549                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7550                 break;
7551         case GAUDI_EVENT_NIC0_QM1:
7552                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7553                 qman_base = mmNIC0_QM1_BASE;
7554                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7555                 break;
7556         case GAUDI_EVENT_NIC1_QM0:
7557                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7558                 qman_base = mmNIC1_QM0_BASE;
7559                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7560                 break;
7561         case GAUDI_EVENT_NIC1_QM1:
7562                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7563                 qman_base = mmNIC1_QM1_BASE;
7564                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7565                 break;
7566         case GAUDI_EVENT_NIC2_QM0:
7567                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7568                 qman_base = mmNIC2_QM0_BASE;
7569                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7570                 break;
7571         case GAUDI_EVENT_NIC2_QM1:
7572                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7573                 qman_base = mmNIC2_QM1_BASE;
7574                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7575                 break;
7576         case GAUDI_EVENT_NIC3_QM0:
7577                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7578                 qman_base = mmNIC3_QM0_BASE;
7579                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7580                 break;
7581         case GAUDI_EVENT_NIC3_QM1:
7582                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7583                 qman_base = mmNIC3_QM1_BASE;
7584                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7585                 break;
7586         case GAUDI_EVENT_NIC4_QM0:
7587                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7588                 qman_base = mmNIC4_QM0_BASE;
7589                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7590                 break;
7591         case GAUDI_EVENT_NIC4_QM1:
7592                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7593                 qman_base = mmNIC4_QM1_BASE;
7594                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7595                 break;
7596         default:
7597                 return;
7598         }
7599
7600         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7601 }
7602
7603 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7604                                         bool razwi)
7605 {
7606         u32 engine_id_1, engine_id_2;
7607         char desc[64] = "";
7608         u64 razwi_addr = 0;
7609         u8 razwi_type;
7610         int rc;
7611
7612         /*
7613          * Init engine id by default as not valid and only if razwi initiated from engine with
7614          * engine id it will get valid value.
7615          * Init razwi type to default, will be changed only if razwi caused by page fault of
7616          * MMU access error
7617          */
7618         engine_id_1 = U16_MAX;
7619         engine_id_2 = U16_MAX;
7620         razwi_type = U8_MAX;
7621
7622         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7623         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7624                 event_type, desc);
7625
7626         if (razwi) {
7627                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7628                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7629
7630                 /* In case it's the first razwi, save its parameters*/
7631                 rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1);
7632                 if (!rc) {
7633                         hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
7634                         hdev->last_error.razwi_timestamp = ktime_get();
7635                         hdev->last_error.razwi_addr = razwi_addr;
7636                         hdev->last_error.razwi_engine_id_1 = engine_id_1;
7637                         hdev->last_error.razwi_engine_id_2 = engine_id_2;
7638                         /*
7639                          * If first engine id holds non valid value the razwi initiator
7640                          * does not have engine id
7641                          */
7642                         hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX);
7643                         hdev->last_error.razwi_type = razwi_type;
7644
7645                 }
7646         }
7647 }
7648
7649 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7650                                         struct cpucp_pkt_sync_err *sync_err)
7651 {
7652         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7653
7654         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7655                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7656 }
7657
7658 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7659                                         struct hl_eq_fw_alive *fw_alive)
7660 {
7661         dev_err(hdev->dev,
7662                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7663                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7664                 "Minor" : "Critical", fw_alive->process_id,
7665                 fw_alive->thread_id, fw_alive->uptime_seconds);
7666 }
7667
7668 static int gaudi_non_hard_reset_late_init(struct hl_device *hdev)
7669 {
7670         /* GAUDI doesn't support any reset except hard-reset */
7671         return -EPERM;
7672 }
7673
7674 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7675                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7676 {
7677         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7678         int rc = 0;
7679
7680         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7681                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7682                 if (!hbm_ecc_data) {
7683                         dev_err(hdev->dev, "No FW ECC data");
7684                         return 0;
7685                 }
7686
7687                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7688                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7689                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7690                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7691                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7692                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7693                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7694                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7695                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7696                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7697                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7698                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7699                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7700                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7701
7702                 dev_err(hdev->dev,
7703                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7704                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7705                 dev_err(hdev->dev,
7706                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7707                         device, ch, hbm_ecc_data->first_addr, type,
7708                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7709                         hbm_ecc_data->dec_cnt);
7710                 return 0;
7711         }
7712
7713         if (hdev->asic_prop.fw_security_enabled) {
7714                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7715                 return 0;
7716         }
7717
7718         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7719         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7720                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7721                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7722                 if (val) {
7723                         rc = -EIO;
7724                         dev_err(hdev->dev,
7725                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7726                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7727                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7728                                 (val >> 4) & 0x1);
7729
7730                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7731                         dev_err(hdev->dev,
7732                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7733                                 device, ch * 2,
7734                                 RREG32(base + ch * 0x1000 + 0x064),
7735                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7736                                 (val2 & 0xFF0000) >> 16,
7737                                 (val2 & 0xFF000000) >> 24);
7738                 }
7739
7740                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7741                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7742                 if (val) {
7743                         rc = -EIO;
7744                         dev_err(hdev->dev,
7745                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7746                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7747                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7748                                 (val >> 4) & 0x1);
7749
7750                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7751                         dev_err(hdev->dev,
7752                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7753                                 device, ch * 2 + 1,
7754                                 RREG32(base + ch * 0x1000 + 0x074),
7755                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7756                                 (val2 & 0xFF0000) >> 16,
7757                                 (val2 & 0xFF000000) >> 24);
7758                 }
7759
7760                 /* Clear interrupts */
7761                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7762                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7763                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7764                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7765                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7766                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7767         }
7768
7769         val  = RREG32(base + 0x8F30);
7770         val2 = RREG32(base + 0x8F34);
7771         if (val | val2) {
7772                 rc = -EIO;
7773                 dev_err(hdev->dev,
7774                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7775                         device, val, val2);
7776         }
7777         val  = RREG32(base + 0x8F40);
7778         val2 = RREG32(base + 0x8F44);
7779         if (val | val2) {
7780                 rc = -EIO;
7781                 dev_err(hdev->dev,
7782                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7783                         device, val, val2);
7784         }
7785
7786         return rc;
7787 }
7788
7789 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7790 {
7791         switch (hbm_event_type) {
7792         case GAUDI_EVENT_HBM0_SPI_0:
7793         case GAUDI_EVENT_HBM0_SPI_1:
7794                 return 0;
7795         case GAUDI_EVENT_HBM1_SPI_0:
7796         case GAUDI_EVENT_HBM1_SPI_1:
7797                 return 1;
7798         case GAUDI_EVENT_HBM2_SPI_0:
7799         case GAUDI_EVENT_HBM2_SPI_1:
7800                 return 2;
7801         case GAUDI_EVENT_HBM3_SPI_0:
7802         case GAUDI_EVENT_HBM3_SPI_1:
7803                 return 3;
7804         default:
7805                 break;
7806         }
7807
7808         /* Should never happen */
7809         return 0;
7810 }
7811
7812 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7813                                         char *interrupt_name)
7814 {
7815         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7816         bool soft_reset_required = false;
7817
7818         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7819                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7820
7821         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7822                 if (tpc_interrupts_cause & BIT(i)) {
7823                         dev_err_ratelimited(hdev->dev,
7824                                         "TPC%d_%s interrupt cause: %s\n",
7825                                         tpc_id, interrupt_name,
7826                                         gaudi_tpc_interrupts_cause[i]);
7827                         /* If this is QM error, we need to soft-reset */
7828                         if (i == 15)
7829                                 soft_reset_required = true;
7830                 }
7831
7832         /* Clear interrupts */
7833         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7834
7835         return soft_reset_required;
7836 }
7837
7838 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7839 {
7840         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7841 }
7842
7843 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7844 {
7845         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7846 }
7847
7848 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7849                                         u16 event_type)
7850 {
7851         ktime_t zero_time = ktime_set(0, 0);
7852
7853         mutex_lock(&hdev->clk_throttling.lock);
7854
7855         switch (event_type) {
7856         case GAUDI_EVENT_FIX_POWER_ENV_S:
7857                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7858                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7859                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7860                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7861                 dev_info_ratelimited(hdev->dev,
7862                         "Clock throttling due to power consumption\n");
7863                 break;
7864
7865         case GAUDI_EVENT_FIX_POWER_ENV_E:
7866                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7867                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7868                 dev_info_ratelimited(hdev->dev,
7869                         "Power envelop is safe, back to optimal clock\n");
7870                 break;
7871
7872         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7873                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7874                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7875                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7876                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7877                 dev_info_ratelimited(hdev->dev,
7878                         "Clock throttling due to overheating\n");
7879                 break;
7880
7881         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7882                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7883                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7884                 dev_info_ratelimited(hdev->dev,
7885                         "Thermal envelop is safe, back to optimal clock\n");
7886                 break;
7887
7888         default:
7889                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7890                         event_type);
7891                 break;
7892         }
7893
7894         mutex_unlock(&hdev->clk_throttling.lock);
7895 }
7896
7897 static void gaudi_handle_eqe(struct hl_device *hdev,
7898                                 struct hl_eq_entry *eq_entry)
7899 {
7900         struct gaudi_device *gaudi = hdev->asic_specific;
7901         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7902         u32 fw_fatal_err_flag = 0;
7903         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7904                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7905         bool reset_required;
7906         u8 cause;
7907         int rc;
7908
7909         if (event_type >= GAUDI_EVENT_SIZE) {
7910                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7911                                 event_type, GAUDI_EVENT_SIZE - 1);
7912                 return;
7913         }
7914
7915         gaudi->events_stat[event_type]++;
7916         gaudi->events_stat_aggregate[event_type]++;
7917
7918         switch (event_type) {
7919         case GAUDI_EVENT_PCIE_CORE_DERR:
7920         case GAUDI_EVENT_PCIE_IF_DERR:
7921         case GAUDI_EVENT_PCIE_PHY_DERR:
7922         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7923         case GAUDI_EVENT_MME0_ACC_DERR:
7924         case GAUDI_EVENT_MME0_SBAB_DERR:
7925         case GAUDI_EVENT_MME1_ACC_DERR:
7926         case GAUDI_EVENT_MME1_SBAB_DERR:
7927         case GAUDI_EVENT_MME2_ACC_DERR:
7928         case GAUDI_EVENT_MME2_SBAB_DERR:
7929         case GAUDI_EVENT_MME3_ACC_DERR:
7930         case GAUDI_EVENT_MME3_SBAB_DERR:
7931         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7932                 fallthrough;
7933         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7934         case GAUDI_EVENT_PSOC_MEM_DERR:
7935         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7936         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7937         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7938         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7939         case GAUDI_EVENT_MMU_DERR:
7940         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7941                 gaudi_print_irq_info(hdev, event_type, true);
7942                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7943                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7944                 goto reset_device;
7945
7946         case GAUDI_EVENT_GIC500:
7947         case GAUDI_EVENT_AXI_ECC:
7948         case GAUDI_EVENT_L2_RAM_ECC:
7949         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7950                 gaudi_print_irq_info(hdev, event_type, false);
7951                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7952                 goto reset_device;
7953
7954         case GAUDI_EVENT_HBM0_SPI_0:
7955         case GAUDI_EVENT_HBM1_SPI_0:
7956         case GAUDI_EVENT_HBM2_SPI_0:
7957         case GAUDI_EVENT_HBM3_SPI_0:
7958                 gaudi_print_irq_info(hdev, event_type, false);
7959                 gaudi_hbm_read_interrupts(hdev,
7960                                 gaudi_hbm_event_to_dev(event_type),
7961                                 &eq_entry->hbm_ecc_data);
7962                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7963                 goto reset_device;
7964
7965         case GAUDI_EVENT_HBM0_SPI_1:
7966         case GAUDI_EVENT_HBM1_SPI_1:
7967         case GAUDI_EVENT_HBM2_SPI_1:
7968         case GAUDI_EVENT_HBM3_SPI_1:
7969                 gaudi_print_irq_info(hdev, event_type, false);
7970                 gaudi_hbm_read_interrupts(hdev,
7971                                 gaudi_hbm_event_to_dev(event_type),
7972                                 &eq_entry->hbm_ecc_data);
7973                 hl_fw_unmask_irq(hdev, event_type);
7974                 break;
7975
7976         case GAUDI_EVENT_TPC0_DEC:
7977         case GAUDI_EVENT_TPC1_DEC:
7978         case GAUDI_EVENT_TPC2_DEC:
7979         case GAUDI_EVENT_TPC3_DEC:
7980         case GAUDI_EVENT_TPC4_DEC:
7981         case GAUDI_EVENT_TPC5_DEC:
7982         case GAUDI_EVENT_TPC6_DEC:
7983         case GAUDI_EVENT_TPC7_DEC:
7984                 gaudi_print_irq_info(hdev, event_type, true);
7985                 reset_required = gaudi_tpc_read_interrupts(hdev,
7986                                         tpc_dec_event_to_tpc_id(event_type),
7987                                         "AXI_SLV_DEC_Error");
7988                 if (reset_required) {
7989                         dev_err(hdev->dev, "reset required due to %s\n",
7990                                 gaudi_irq_map_table[event_type].name);
7991
7992                         hl_device_reset(hdev, 0);
7993                 } else {
7994                         hl_fw_unmask_irq(hdev, event_type);
7995                 }
7996                 break;
7997
7998         case GAUDI_EVENT_TPC0_KRN_ERR:
7999         case GAUDI_EVENT_TPC1_KRN_ERR:
8000         case GAUDI_EVENT_TPC2_KRN_ERR:
8001         case GAUDI_EVENT_TPC3_KRN_ERR:
8002         case GAUDI_EVENT_TPC4_KRN_ERR:
8003         case GAUDI_EVENT_TPC5_KRN_ERR:
8004         case GAUDI_EVENT_TPC6_KRN_ERR:
8005         case GAUDI_EVENT_TPC7_KRN_ERR:
8006                 gaudi_print_irq_info(hdev, event_type, true);
8007                 reset_required = gaudi_tpc_read_interrupts(hdev,
8008                                         tpc_krn_event_to_tpc_id(event_type),
8009                                         "KRN_ERR");
8010                 if (reset_required) {
8011                         dev_err(hdev->dev, "reset required due to %s\n",
8012                                 gaudi_irq_map_table[event_type].name);
8013
8014                         hl_device_reset(hdev, 0);
8015                 } else {
8016                         hl_fw_unmask_irq(hdev, event_type);
8017                 }
8018                 break;
8019
8020         case GAUDI_EVENT_PCIE_CORE_SERR:
8021         case GAUDI_EVENT_PCIE_IF_SERR:
8022         case GAUDI_EVENT_PCIE_PHY_SERR:
8023         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8024         case GAUDI_EVENT_MME0_ACC_SERR:
8025         case GAUDI_EVENT_MME0_SBAB_SERR:
8026         case GAUDI_EVENT_MME1_ACC_SERR:
8027         case GAUDI_EVENT_MME1_SBAB_SERR:
8028         case GAUDI_EVENT_MME2_ACC_SERR:
8029         case GAUDI_EVENT_MME2_SBAB_SERR:
8030         case GAUDI_EVENT_MME3_ACC_SERR:
8031         case GAUDI_EVENT_MME3_SBAB_SERR:
8032         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8033         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8034         case GAUDI_EVENT_PSOC_MEM_SERR:
8035         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8036         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8037         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8038         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8039                 fallthrough;
8040         case GAUDI_EVENT_MMU_SERR:
8041                 gaudi_print_irq_info(hdev, event_type, true);
8042                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8043                 hl_fw_unmask_irq(hdev, event_type);
8044                 break;
8045
8046         case GAUDI_EVENT_PCIE_DEC:
8047         case GAUDI_EVENT_MME0_WBC_RSP:
8048         case GAUDI_EVENT_MME0_SBAB0_RSP:
8049         case GAUDI_EVENT_MME1_WBC_RSP:
8050         case GAUDI_EVENT_MME1_SBAB0_RSP:
8051         case GAUDI_EVENT_MME2_WBC_RSP:
8052         case GAUDI_EVENT_MME2_SBAB0_RSP:
8053         case GAUDI_EVENT_MME3_WBC_RSP:
8054         case GAUDI_EVENT_MME3_SBAB0_RSP:
8055         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8056         case GAUDI_EVENT_PSOC_AXI_DEC:
8057         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8058         case GAUDI_EVENT_MMU_PAGE_FAULT:
8059         case GAUDI_EVENT_MMU_WR_PERM:
8060         case GAUDI_EVENT_RAZWI_OR_ADC:
8061         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8062         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8063         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8064                 fallthrough;
8065         case GAUDI_EVENT_NIC0_QM0:
8066         case GAUDI_EVENT_NIC0_QM1:
8067         case GAUDI_EVENT_NIC1_QM0:
8068         case GAUDI_EVENT_NIC1_QM1:
8069         case GAUDI_EVENT_NIC2_QM0:
8070         case GAUDI_EVENT_NIC2_QM1:
8071         case GAUDI_EVENT_NIC3_QM0:
8072         case GAUDI_EVENT_NIC3_QM1:
8073         case GAUDI_EVENT_NIC4_QM0:
8074         case GAUDI_EVENT_NIC4_QM1:
8075         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8076                 gaudi_print_irq_info(hdev, event_type, true);
8077                 gaudi_handle_qman_err(hdev, event_type);
8078                 hl_fw_unmask_irq(hdev, event_type);
8079                 break;
8080
8081         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8082                 gaudi_print_irq_info(hdev, event_type, true);
8083                 goto reset_device;
8084
8085         case GAUDI_EVENT_TPC0_BMON_SPMU:
8086         case GAUDI_EVENT_TPC1_BMON_SPMU:
8087         case GAUDI_EVENT_TPC2_BMON_SPMU:
8088         case GAUDI_EVENT_TPC3_BMON_SPMU:
8089         case GAUDI_EVENT_TPC4_BMON_SPMU:
8090         case GAUDI_EVENT_TPC5_BMON_SPMU:
8091         case GAUDI_EVENT_TPC6_BMON_SPMU:
8092         case GAUDI_EVENT_TPC7_BMON_SPMU:
8093         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8094                 gaudi_print_irq_info(hdev, event_type, false);
8095                 hl_fw_unmask_irq(hdev, event_type);
8096                 break;
8097
8098         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8099                 gaudi_print_irq_info(hdev, event_type, false);
8100                 gaudi_print_sm_sei_info(hdev, event_type,
8101                                         &eq_entry->sm_sei_data);
8102                 rc = hl_state_dump(hdev);
8103                 if (rc)
8104                         dev_err(hdev->dev,
8105                                 "Error during system state dump %d\n", rc);
8106                 hl_fw_unmask_irq(hdev, event_type);
8107                 break;
8108
8109         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8110                 gaudi_print_clk_change_info(hdev, event_type);
8111                 hl_fw_unmask_irq(hdev, event_type);
8112                 break;
8113
8114         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8115                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8116                 dev_err(hdev->dev,
8117                         "Received high temp H/W interrupt %d (cause %d)\n",
8118                         event_type, cause);
8119                 break;
8120
8121         case GAUDI_EVENT_DEV_RESET_REQ:
8122                 gaudi_print_irq_info(hdev, event_type, false);
8123                 goto reset_device;
8124
8125         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8126                 gaudi_print_irq_info(hdev, event_type, false);
8127                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8128                 goto reset_device;
8129
8130         case GAUDI_EVENT_FW_ALIVE_S:
8131                 gaudi_print_irq_info(hdev, event_type, false);
8132                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8133                 goto reset_device;
8134
8135         default:
8136                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8137                                 event_type);
8138                 break;
8139         }
8140
8141         return;
8142
8143 reset_device:
8144         if (hdev->asic_prop.fw_security_enabled)
8145                 hl_device_reset(hdev, HL_DRV_RESET_HARD
8146                                         | HL_DRV_RESET_BYPASS_REQ_TO_FW
8147                                         | fw_fatal_err_flag);
8148         else if (hdev->hard_reset_on_fw_events)
8149                 hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
8150         else
8151                 hl_fw_unmask_irq(hdev, event_type);
8152 }
8153
8154 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8155                                         u32 *size)
8156 {
8157         struct gaudi_device *gaudi = hdev->asic_specific;
8158
8159         if (aggregate) {
8160                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8161                 return gaudi->events_stat_aggregate;
8162         }
8163
8164         *size = (u32) sizeof(gaudi->events_stat);
8165         return gaudi->events_stat;
8166 }
8167
8168 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8169                                         u32 flags)
8170 {
8171         struct gaudi_device *gaudi = hdev->asic_specific;
8172         u32 status, timeout_usec;
8173         int rc;
8174
8175         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8176                 hdev->reset_info.hard_reset_pending)
8177                 return 0;
8178
8179         if (hdev->pldm)
8180                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8181         else
8182                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8183
8184         /* L0 & L1 invalidation */
8185         WREG32(mmSTLB_INV_PS, 3);
8186         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8187         WREG32(mmSTLB_INV_PS, 2);
8188
8189         rc = hl_poll_timeout(
8190                 hdev,
8191                 mmSTLB_INV_PS,
8192                 status,
8193                 !status,
8194                 1000,
8195                 timeout_usec);
8196
8197         WREG32(mmSTLB_INV_SET, 0);
8198
8199         return rc;
8200 }
8201
8202 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8203                                                 bool is_hard, u32 flags,
8204                                                 u32 asid, u64 va, u64 size)
8205 {
8206         /* Treat as invalidate all because there is no range invalidation
8207          * in Gaudi
8208          */
8209         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8210 }
8211
8212 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8213                                         u32 asid, u64 phys_addr)
8214 {
8215         u32 status, timeout_usec;
8216         int rc;
8217
8218         if (hdev->pldm)
8219                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8220         else
8221                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8222
8223         WREG32(MMU_ASID, asid);
8224         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8225         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8226         WREG32(MMU_BUSY, 0x80000000);
8227
8228         rc = hl_poll_timeout(
8229                 hdev,
8230                 MMU_BUSY,
8231                 status,
8232                 !(status & 0x80000000),
8233                 1000,
8234                 timeout_usec);
8235
8236         if (rc) {
8237                 dev_err(hdev->dev,
8238                         "Timeout during MMU hop0 config of asid %d\n", asid);
8239                 return rc;
8240         }
8241
8242         return 0;
8243 }
8244
8245 static int gaudi_send_heartbeat(struct hl_device *hdev)
8246 {
8247         struct gaudi_device *gaudi = hdev->asic_specific;
8248
8249         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8250                 return 0;
8251
8252         return hl_fw_send_heartbeat(hdev);
8253 }
8254
8255 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8256 {
8257         struct gaudi_device *gaudi = hdev->asic_specific;
8258         struct asic_fixed_properties *prop = &hdev->asic_prop;
8259         int rc;
8260
8261         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8262                 return 0;
8263
8264         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8265                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8266                                         mmCPU_BOOT_ERR1);
8267         if (rc)
8268                 return rc;
8269
8270         if (!strlen(prop->cpucp_info.card_name))
8271                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8272                                 CARD_NAME_MAX_LEN);
8273
8274         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8275
8276         set_default_power_values(hdev);
8277
8278         hdev->max_power = prop->max_power_default;
8279
8280         return 0;
8281 }
8282
8283 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8284                                         u8 mask_len, struct seq_file *s)
8285 {
8286         struct gaudi_device *gaudi = hdev->asic_specific;
8287         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8288         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8289         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8290         unsigned long *mask = (unsigned long *)mask_arr;
8291         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8292         bool is_idle = true, is_eng_idle, is_slave;
8293         u64 offset;
8294         int i, dma_id, port;
8295
8296         if (s)
8297                 seq_puts(s,
8298                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8299                         "---  -------  ------------  ----------  -------------\n");
8300
8301         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8302                 dma_id = gaudi_dma_assignment[i];
8303                 offset = dma_id * DMA_QMAN_OFFSET;
8304
8305                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8306                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8307                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8308                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8309                                 IS_DMA_IDLE(dma_core_sts0);
8310                 is_idle &= is_eng_idle;
8311
8312                 if (mask && !is_eng_idle)
8313                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8314                 if (s)
8315                         seq_printf(s, fmt, dma_id,
8316                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8317                                 qm_cgm_sts, dma_core_sts0);
8318         }
8319
8320         if (s)
8321                 seq_puts(s,
8322                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8323                         "---  -------  ------------  ----------  ----------\n");
8324
8325         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8326                 offset = i * TPC_QMAN_OFFSET;
8327                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8328                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8329                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8330                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8331                                 IS_TPC_IDLE(tpc_cfg_sts);
8332                 is_idle &= is_eng_idle;
8333
8334                 if (mask && !is_eng_idle)
8335                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8336                 if (s)
8337                         seq_printf(s, fmt, i,
8338                                 is_eng_idle ? "Y" : "N",
8339                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8340         }
8341
8342         if (s)
8343                 seq_puts(s,
8344                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8345                         "---  -------  ------------  ----------  -----------\n");
8346
8347         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8348                 offset = i * MME_QMAN_OFFSET;
8349                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8350                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8351
8352                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8353                 is_slave = i % 2;
8354                 if (!is_slave) {
8355                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8356                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8357                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8358                 }
8359
8360                 is_idle &= is_eng_idle;
8361
8362                 if (mask && !is_eng_idle)
8363                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8364                 if (s) {
8365                         if (!is_slave)
8366                                 seq_printf(s, fmt, i,
8367                                         is_eng_idle ? "Y" : "N",
8368                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8369                         else
8370                                 seq_printf(s, mme_slave_fmt, i,
8371                                         is_eng_idle ? "Y" : "N", "-",
8372                                         "-", mme_arch_sts);
8373                 }
8374         }
8375
8376         if (s)
8377                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8378                                 "---  -------  ------------  ----------\n");
8379
8380         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8381                 offset = i * NIC_MACRO_QMAN_OFFSET;
8382                 port = 2 * i;
8383                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8384                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8385                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8386                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8387                         is_idle &= is_eng_idle;
8388
8389                         if (mask && !is_eng_idle)
8390                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8391                         if (s)
8392                                 seq_printf(s, nic_fmt, port,
8393                                                 is_eng_idle ? "Y" : "N",
8394                                                 qm_glbl_sts0, qm_cgm_sts);
8395                 }
8396
8397                 port = 2 * i + 1;
8398                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8399                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8400                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8401                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8402                         is_idle &= is_eng_idle;
8403
8404                         if (mask && !is_eng_idle)
8405                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8406                         if (s)
8407                                 seq_printf(s, nic_fmt, port,
8408                                                 is_eng_idle ? "Y" : "N",
8409                                                 qm_glbl_sts0, qm_cgm_sts);
8410                 }
8411         }
8412
8413         if (s)
8414                 seq_puts(s, "\n");
8415
8416         return is_idle;
8417 }
8418
8419 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8420         __acquires(&gaudi->hw_queues_lock)
8421 {
8422         struct gaudi_device *gaudi = hdev->asic_specific;
8423
8424         spin_lock(&gaudi->hw_queues_lock);
8425 }
8426
8427 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8428         __releases(&gaudi->hw_queues_lock)
8429 {
8430         struct gaudi_device *gaudi = hdev->asic_specific;
8431
8432         spin_unlock(&gaudi->hw_queues_lock);
8433 }
8434
8435 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8436 {
8437         return hdev->pdev->device;
8438 }
8439
8440 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8441                                 size_t max_size)
8442 {
8443         struct gaudi_device *gaudi = hdev->asic_specific;
8444
8445         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8446                 return 0;
8447
8448         return hl_fw_get_eeprom_data(hdev, data, max_size);
8449 }
8450
8451 /*
8452  * this function should be used only during initialization and/or after reset,
8453  * when there are no active users.
8454  */
8455 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8456 {
8457         u64 kernel_timeout;
8458         u32 status, offset;
8459         int rc;
8460
8461         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8462
8463         if (hdev->pldm)
8464                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8465         else
8466                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8467
8468         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8469                         lower_32_bits(tpc_kernel));
8470         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8471                         upper_32_bits(tpc_kernel));
8472
8473         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8474                         lower_32_bits(tpc_kernel));
8475         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8476                         upper_32_bits(tpc_kernel));
8477         /* set a valid LUT pointer, content is of no significance */
8478         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8479                         lower_32_bits(tpc_kernel));
8480         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8481                         upper_32_bits(tpc_kernel));
8482
8483         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8484                         lower_32_bits(CFG_BASE +
8485                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8486
8487         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8488                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8489                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8490         /* wait a bit for the engine to start executing */
8491         usleep_range(1000, 1500);
8492
8493         /* wait until engine has finished executing */
8494         rc = hl_poll_timeout(
8495                 hdev,
8496                 mmTPC0_CFG_STATUS + offset,
8497                 status,
8498                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8499                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8500                 1000,
8501                 kernel_timeout);
8502
8503         if (rc) {
8504                 dev_err(hdev->dev,
8505                         "Timeout while waiting for TPC%d icache prefetch\n",
8506                         tpc_id);
8507                 return -EIO;
8508         }
8509
8510         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8511                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8512
8513         /* wait a bit for the engine to start executing */
8514         usleep_range(1000, 1500);
8515
8516         /* wait until engine has finished executing */
8517         rc = hl_poll_timeout(
8518                 hdev,
8519                 mmTPC0_CFG_STATUS + offset,
8520                 status,
8521                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8522                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8523                 1000,
8524                 kernel_timeout);
8525
8526         if (rc) {
8527                 dev_err(hdev->dev,
8528                         "Timeout while waiting for TPC%d vector pipe\n",
8529                         tpc_id);
8530                 return -EIO;
8531         }
8532
8533         rc = hl_poll_timeout(
8534                 hdev,
8535                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8536                 status,
8537                 (status == 0),
8538                 1000,
8539                 kernel_timeout);
8540
8541         if (rc) {
8542                 dev_err(hdev->dev,
8543                         "Timeout while waiting for TPC%d kernel to execute\n",
8544                         tpc_id);
8545                 return -EIO;
8546         }
8547
8548         return 0;
8549 }
8550
8551 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8552                 struct hl_ctx *ctx)
8553 {
8554         struct gaudi_device *gaudi = hdev->asic_specific;
8555         int min_alloc_order, rc, collective_cb_size;
8556
8557         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8558                 return 0;
8559
8560         hdev->internal_cb_pool_virt_addr =
8561                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8562                                         HOST_SPACE_INTERNAL_CB_SZ,
8563                                         &hdev->internal_cb_pool_dma_addr,
8564                                         GFP_KERNEL | __GFP_ZERO);
8565
8566         if (!hdev->internal_cb_pool_virt_addr)
8567                 return -ENOMEM;
8568
8569         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8570                         sizeof(struct packet_fence);
8571         min_alloc_order = ilog2(collective_cb_size);
8572
8573         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8574         if (!hdev->internal_cb_pool) {
8575                 dev_err(hdev->dev,
8576                         "Failed to create internal CB pool\n");
8577                 rc = -ENOMEM;
8578                 goto free_internal_cb_pool;
8579         }
8580
8581         rc = gen_pool_add(hdev->internal_cb_pool,
8582                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8583                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8584         if (rc) {
8585                 dev_err(hdev->dev,
8586                         "Failed to add memory to internal CB pool\n");
8587                 rc = -EFAULT;
8588                 goto destroy_internal_cb_pool;
8589         }
8590
8591         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8592                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8593                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8594
8595         if (!hdev->internal_cb_va_base) {
8596                 rc = -ENOMEM;
8597                 goto destroy_internal_cb_pool;
8598         }
8599
8600         mutex_lock(&ctx->mmu_lock);
8601         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8602                         hdev->internal_cb_pool_dma_addr,
8603                         HOST_SPACE_INTERNAL_CB_SZ);
8604
8605         hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8606         mutex_unlock(&ctx->mmu_lock);
8607
8608         if (rc)
8609                 goto unreserve_internal_cb_pool;
8610
8611         return 0;
8612
8613 unreserve_internal_cb_pool:
8614         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8615                         HOST_SPACE_INTERNAL_CB_SZ);
8616 destroy_internal_cb_pool:
8617         gen_pool_destroy(hdev->internal_cb_pool);
8618 free_internal_cb_pool:
8619         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8620                         HOST_SPACE_INTERNAL_CB_SZ,
8621                         hdev->internal_cb_pool_virt_addr,
8622                         hdev->internal_cb_pool_dma_addr);
8623
8624         return rc;
8625 }
8626
8627 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8628                 struct hl_ctx *ctx)
8629 {
8630         struct gaudi_device *gaudi = hdev->asic_specific;
8631
8632         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8633                 return;
8634
8635         mutex_lock(&ctx->mmu_lock);
8636         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8637                         HOST_SPACE_INTERNAL_CB_SZ);
8638         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8639                         HOST_SPACE_INTERNAL_CB_SZ);
8640         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8641         mutex_unlock(&ctx->mmu_lock);
8642
8643         gen_pool_destroy(hdev->internal_cb_pool);
8644
8645         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8646                         HOST_SPACE_INTERNAL_CB_SZ,
8647                         hdev->internal_cb_pool_virt_addr,
8648                         hdev->internal_cb_pool_dma_addr);
8649 }
8650
8651 static int gaudi_ctx_init(struct hl_ctx *ctx)
8652 {
8653         int rc;
8654
8655         if (ctx->asid == HL_KERNEL_ASID_ID)
8656                 return 0;
8657
8658         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8659         if (rc)
8660                 return rc;
8661
8662         rc = gaudi_restore_user_registers(ctx->hdev);
8663         if (rc)
8664                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8665
8666         return rc;
8667 }
8668
8669 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8670 {
8671         if (ctx->asid == HL_KERNEL_ASID_ID)
8672                 return;
8673
8674         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8675 }
8676
8677 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8678 {
8679         return gaudi_cq_assignment[cq_idx];
8680 }
8681
8682 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8683 {
8684         return sizeof(struct packet_msg_short) +
8685                         sizeof(struct packet_msg_prot) * 2;
8686 }
8687
8688 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8689 {
8690         return sizeof(struct packet_msg_short) * 4 +
8691                         sizeof(struct packet_fence) +
8692                         sizeof(struct packet_msg_prot) * 2;
8693 }
8694
8695 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8696 {
8697         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8698 }
8699
8700 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8701                                 u32 size, bool eb)
8702 {
8703         struct hl_cb *cb = (struct hl_cb *) data;
8704         struct packet_msg_short *pkt;
8705         u32 value, ctl, pkt_size = sizeof(*pkt);
8706
8707         pkt = cb->kernel_address + size;
8708         memset(pkt, 0, pkt_size);
8709
8710         /* Inc by 1, Mode ADD */
8711         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8712         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8713
8714         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8715         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8716         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8717         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8718         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8719         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8720         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8721
8722         pkt->value = cpu_to_le32(value);
8723         pkt->ctl = cpu_to_le32(ctl);
8724
8725         return size + pkt_size;
8726 }
8727
8728 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8729                                         u16 addr)
8730 {
8731         u32 ctl, pkt_size = sizeof(*pkt);
8732
8733         memset(pkt, 0, pkt_size);
8734
8735         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8736         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8737         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8738         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8739         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8740         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8741
8742         pkt->value = cpu_to_le32(value);
8743         pkt->ctl = cpu_to_le32(ctl);
8744
8745         return pkt_size;
8746 }
8747
8748 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8749                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8750                 u16 sob_val, u16 mon_id)
8751 {
8752         u64 monitor_base;
8753         u32 ctl, value, pkt_size = sizeof(*pkt);
8754         u16 msg_addr_offset;
8755         u8 mask;
8756
8757         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8758                 dev_err(hdev->dev,
8759                         "sob_base %u (mask %#x) is not valid\n",
8760                         sob_base, sob_mask);
8761                 return 0;
8762         }
8763
8764         /*
8765          * monitor_base should be the content of the base0 address registers,
8766          * so it will be added to the msg short offsets
8767          */
8768         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8769
8770         msg_addr_offset =
8771                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8772                                 monitor_base;
8773
8774         memset(pkt, 0, pkt_size);
8775
8776         /* Monitor config packet: bind the monitor to a sync object */
8777         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8778         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8779         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8780                         0); /* GREATER OR EQUAL*/
8781         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8782
8783         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8784         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8785         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8786         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8787         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8788         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8789         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8790
8791         pkt->value = cpu_to_le32(value);
8792         pkt->ctl = cpu_to_le32(ctl);
8793
8794         return pkt_size;
8795 }
8796
8797 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8798 {
8799         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8800
8801         memset(pkt, 0, pkt_size);
8802
8803         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8804         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8805         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8806
8807         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8808         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8809         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8810         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8811
8812         pkt->cfg = cpu_to_le32(cfg);
8813         pkt->ctl = cpu_to_le32(ctl);
8814
8815         return pkt_size;
8816 }
8817
8818 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8819 {
8820         u32 offset, nic_index;
8821
8822         switch (queue_id) {
8823         case GAUDI_QUEUE_ID_DMA_0_0:
8824                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8825                 break;
8826         case GAUDI_QUEUE_ID_DMA_0_1:
8827                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8828                 break;
8829         case GAUDI_QUEUE_ID_DMA_0_2:
8830                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8831                 break;
8832         case GAUDI_QUEUE_ID_DMA_0_3:
8833                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8834                 break;
8835         case GAUDI_QUEUE_ID_DMA_1_0:
8836                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8837                 break;
8838         case GAUDI_QUEUE_ID_DMA_1_1:
8839                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8840                 break;
8841         case GAUDI_QUEUE_ID_DMA_1_2:
8842                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8843                 break;
8844         case GAUDI_QUEUE_ID_DMA_1_3:
8845                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8846                 break;
8847         case GAUDI_QUEUE_ID_DMA_5_0:
8848                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8849                 break;
8850         case GAUDI_QUEUE_ID_DMA_5_1:
8851                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8852                 break;
8853         case GAUDI_QUEUE_ID_DMA_5_2:
8854                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8855                 break;
8856         case GAUDI_QUEUE_ID_DMA_5_3:
8857                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8858                 break;
8859         case GAUDI_QUEUE_ID_TPC_7_0:
8860                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8861                 break;
8862         case GAUDI_QUEUE_ID_TPC_7_1:
8863                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8864                 break;
8865         case GAUDI_QUEUE_ID_TPC_7_2:
8866                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8867                 break;
8868         case GAUDI_QUEUE_ID_TPC_7_3:
8869                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8870                 break;
8871         case GAUDI_QUEUE_ID_NIC_0_0:
8872         case GAUDI_QUEUE_ID_NIC_1_0:
8873         case GAUDI_QUEUE_ID_NIC_2_0:
8874         case GAUDI_QUEUE_ID_NIC_3_0:
8875         case GAUDI_QUEUE_ID_NIC_4_0:
8876         case GAUDI_QUEUE_ID_NIC_5_0:
8877         case GAUDI_QUEUE_ID_NIC_6_0:
8878         case GAUDI_QUEUE_ID_NIC_7_0:
8879         case GAUDI_QUEUE_ID_NIC_8_0:
8880         case GAUDI_QUEUE_ID_NIC_9_0:
8881                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8882                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8883                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8884                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8885                 break;
8886         case GAUDI_QUEUE_ID_NIC_0_1:
8887         case GAUDI_QUEUE_ID_NIC_1_1:
8888         case GAUDI_QUEUE_ID_NIC_2_1:
8889         case GAUDI_QUEUE_ID_NIC_3_1:
8890         case GAUDI_QUEUE_ID_NIC_4_1:
8891         case GAUDI_QUEUE_ID_NIC_5_1:
8892         case GAUDI_QUEUE_ID_NIC_6_1:
8893         case GAUDI_QUEUE_ID_NIC_7_1:
8894         case GAUDI_QUEUE_ID_NIC_8_1:
8895         case GAUDI_QUEUE_ID_NIC_9_1:
8896                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8897                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8898                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8899                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8900                 break;
8901         case GAUDI_QUEUE_ID_NIC_0_2:
8902         case GAUDI_QUEUE_ID_NIC_1_2:
8903         case GAUDI_QUEUE_ID_NIC_2_2:
8904         case GAUDI_QUEUE_ID_NIC_3_2:
8905         case GAUDI_QUEUE_ID_NIC_4_2:
8906         case GAUDI_QUEUE_ID_NIC_5_2:
8907         case GAUDI_QUEUE_ID_NIC_6_2:
8908         case GAUDI_QUEUE_ID_NIC_7_2:
8909         case GAUDI_QUEUE_ID_NIC_8_2:
8910         case GAUDI_QUEUE_ID_NIC_9_2:
8911                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8912                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8913                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8914                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8915                 break;
8916         case GAUDI_QUEUE_ID_NIC_0_3:
8917         case GAUDI_QUEUE_ID_NIC_1_3:
8918         case GAUDI_QUEUE_ID_NIC_2_3:
8919         case GAUDI_QUEUE_ID_NIC_3_3:
8920         case GAUDI_QUEUE_ID_NIC_4_3:
8921         case GAUDI_QUEUE_ID_NIC_5_3:
8922         case GAUDI_QUEUE_ID_NIC_6_3:
8923         case GAUDI_QUEUE_ID_NIC_7_3:
8924         case GAUDI_QUEUE_ID_NIC_8_3:
8925         case GAUDI_QUEUE_ID_NIC_9_3:
8926                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8927                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8928                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8929                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8930                 break;
8931         default:
8932                 return -EINVAL;
8933         }
8934
8935         *addr = CFG_BASE + offset;
8936
8937         return 0;
8938 }
8939
8940 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8941 {
8942         u64 monitor_base;
8943         u32 size = 0;
8944         u16 msg_addr_offset;
8945
8946         /*
8947          * monitor_base should be the content of the base0 address registers,
8948          * so it will be added to the msg short offsets
8949          */
8950         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8951
8952         /* First monitor config packet: low address of the sync */
8953         msg_addr_offset =
8954                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8955                                 monitor_base;
8956
8957         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8958                                         msg_addr_offset);
8959
8960         /* Second monitor config packet: high address of the sync */
8961         msg_addr_offset =
8962                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8963                                 monitor_base;
8964
8965         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8966                                         msg_addr_offset);
8967
8968         /*
8969          * Third monitor config packet: the payload, i.e. what to write when the
8970          * sync triggers
8971          */
8972         msg_addr_offset =
8973                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8974                                 monitor_base;
8975
8976         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8977
8978         return size;
8979 }
8980
8981 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8982                                 struct hl_gen_wait_properties *prop)
8983 {
8984         struct hl_cb *cb = (struct hl_cb *) prop->data;
8985         void *buf = cb->kernel_address;
8986         u64 fence_addr = 0;
8987         u32 size = prop->size;
8988
8989         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8990                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8991                                 prop->q_idx);
8992                 return 0;
8993         }
8994
8995         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8996         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8997                         prop->sob_mask, prop->sob_val, prop->mon_id);
8998         size += gaudi_add_fence_pkt(buf + size);
8999
9000         return size;
9001 }
9002
9003 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9004 {
9005         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9006
9007         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9008                 hw_sob->sob_id);
9009
9010         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9011                         hw_sob->sob_id * 4, 0);
9012
9013         kref_init(&hw_sob->kref);
9014 }
9015
9016 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9017 {
9018         hdev->dma_mask = 48;
9019 }
9020
9021 static u64 gaudi_get_device_time(struct hl_device *hdev)
9022 {
9023         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9024
9025         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9026 }
9027
9028 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9029                                 u32 *block_size, u32 *block_id)
9030 {
9031         return -EPERM;
9032 }
9033
9034 static int gaudi_block_mmap(struct hl_device *hdev,
9035                                 struct vm_area_struct *vma,
9036                                 u32 block_id, u32 block_size)
9037 {
9038         return -EPERM;
9039 }
9040
9041 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9042 {
9043         struct cpu_dyn_regs *dyn_regs =
9044                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9045         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9046                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9047                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9048
9049         WREG32(irq_handler_offset,
9050                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9051 }
9052
9053 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9054 {
9055         switch (pll_idx) {
9056         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9057         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9058         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9059         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9060         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9061         case HL_GAUDI_MME_PLL: return MME_PLL;
9062         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9063         case HL_GAUDI_IF_PLL: return IF_PLL;
9064         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9065         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9066         default: return -EINVAL;
9067         }
9068 }
9069
9070 static int gaudi_add_sync_to_engine_map_entry(
9071         struct hl_sync_to_engine_map *map, u32 reg_value,
9072         enum hl_sync_engine_type engine_type, u32 engine_id)
9073 {
9074         struct hl_sync_to_engine_map_entry *entry;
9075
9076         /* Reg value represents a partial address of sync object,
9077          * it is used as unique identifier. For this we need to
9078          * clear the cutoff cfg base bits from the value.
9079          */
9080         if (reg_value == 0 || reg_value == 0xffffffff)
9081                 return 0;
9082         reg_value -= (u32)CFG_BASE;
9083
9084         /* create a new hash entry */
9085         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9086         if (!entry)
9087                 return -ENOMEM;
9088         entry->engine_type = engine_type;
9089         entry->engine_id = engine_id;
9090         entry->sync_id = reg_value;
9091         hash_add(map->tb, &entry->node, reg_value);
9092
9093         return 0;
9094 }
9095
9096 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9097                                 struct hl_sync_to_engine_map *map)
9098 {
9099         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9100         int i, j, rc;
9101         u32 reg_value;
9102
9103         /* Iterate over TPC engines */
9104         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9105
9106                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9107                                         sds->props[SP_NEXT_TPC] * i);
9108
9109                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9110                                                         ENGINE_TPC, i);
9111                 if (rc)
9112                         goto free_sync_to_engine_map;
9113         }
9114
9115         /* Iterate over MME engines */
9116         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9117                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9118
9119                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9120                                                 sds->props[SP_NEXT_MME] * i +
9121                                                 j * sizeof(u32));
9122
9123                         rc = gaudi_add_sync_to_engine_map_entry(
9124                                 map, reg_value, ENGINE_MME,
9125                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9126                         if (rc)
9127                                 goto free_sync_to_engine_map;
9128                 }
9129         }
9130
9131         /* Iterate over DMA engines */
9132         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9133                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9134                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9135                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9136                                                         ENGINE_DMA, i);
9137                 if (rc)
9138                         goto free_sync_to_engine_map;
9139         }
9140
9141         return 0;
9142
9143 free_sync_to_engine_map:
9144         hl_state_dump_free_sync_to_engine_map(map);
9145
9146         return rc;
9147 }
9148
9149 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9150 {
9151         return FIELD_GET(
9152                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9153                 mon->status);
9154 }
9155
9156 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
9157 {
9158         const size_t max_write = 10;
9159         u32 gid, mask, sob;
9160         int i, offset;
9161
9162         /* Sync object ID is calculated as follows:
9163          * (8 * group_id + cleared bits in mask)
9164          */
9165         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9166                         mon->arm_data);
9167         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9168                         mon->arm_data);
9169
9170         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
9171                 max_write; mask >>= 1, i++) {
9172                 if (!(mask & 1)) {
9173                         sob = gid * MONITOR_MAX_SOBS + i;
9174
9175                         if (offset > 0)
9176                                 offset += snprintf(sobs + offset, max_write,
9177                                                         ", ");
9178
9179                         offset += snprintf(sobs + offset, max_write, "%u", sob);
9180                 }
9181         }
9182 }
9183
9184 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9185                                 struct hl_device *hdev,
9186                                 struct hl_mon_state_dump *mon)
9187 {
9188         const char *name;
9189         char scratch_buf1[BIN_REG_STRING_SIZE],
9190                 scratch_buf2[BIN_REG_STRING_SIZE];
9191         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
9192
9193         name = hl_state_dump_get_monitor_name(hdev, mon);
9194         if (!name)
9195                 name = "";
9196
9197         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
9198
9199         return hl_snprintf_resize(
9200                 buf, size, offset,
9201                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
9202                 mon->id, name,
9203                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9204                                 mon->arm_data),
9205                 hl_format_as_binary(
9206                         scratch_buf1, sizeof(scratch_buf1),
9207                         FIELD_GET(
9208                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9209                                 mon->arm_data)),
9210                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9211                                 mon->arm_data),
9212                 mon->wr_data,
9213                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9214                 hl_format_as_binary(
9215                         scratch_buf2, sizeof(scratch_buf2),
9216                         FIELD_GET(
9217                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9218                                 mon->status)),
9219                 monitored_sobs);
9220 }
9221
9222
9223 static int gaudi_print_fences_single_engine(
9224         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9225         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9226         size_t *size, size_t *offset)
9227 {
9228         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9229         int rc = -ENOMEM, i;
9230         u32 *statuses, *fences;
9231
9232         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9233                         sizeof(*statuses), GFP_KERNEL);
9234         if (!statuses)
9235                 goto out;
9236
9237         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9238                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9239                          sizeof(*fences), GFP_KERNEL);
9240         if (!fences)
9241                 goto free_status;
9242
9243         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9244                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9245
9246         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9247                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9248                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9249
9250         /* The actual print */
9251         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9252                 u32 fence_id;
9253                 u64 fence_cnt, fence_rdata;
9254                 const char *engine_name;
9255
9256                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9257                         statuses[i]))
9258                         continue;
9259
9260                 fence_id =
9261                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9262                 fence_cnt = base_offset + CFG_BASE +
9263                         sizeof(u32) *
9264                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9265                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9266                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9267                 engine_name = hl_sync_engine_to_string(engine_type);
9268
9269                 rc = hl_snprintf_resize(
9270                         buf, size, offset,
9271                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9272                         engine_name, engine_id,
9273                         i, fence_id,
9274                         fence_cnt, engine_name, engine_id, fence_id, i,
9275                         fence_rdata, engine_name, engine_id, fence_id, i,
9276                         fences[fence_id],
9277                         statuses[i]);
9278                 if (rc)
9279                         goto free_fences;
9280         }
9281
9282         rc = 0;
9283
9284 free_fences:
9285         kfree(fences);
9286 free_status:
9287         kfree(statuses);
9288 out:
9289         return rc;
9290 }
9291
9292
9293 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9294         .monitor_valid = gaudi_monitor_valid,
9295         .print_single_monitor = gaudi_print_single_monitor,
9296         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9297         .print_fences_single_engine = gaudi_print_fences_single_engine,
9298 };
9299
9300 static void gaudi_state_dump_init(struct hl_device *hdev)
9301 {
9302         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9303         int i;
9304
9305         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9306                 hash_add(sds->so_id_to_str_tb,
9307                         &gaudi_so_id_to_str[i].node,
9308                         gaudi_so_id_to_str[i].id);
9309
9310         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9311                 hash_add(sds->monitor_id_to_str_tb,
9312                         &gaudi_monitor_id_to_str[i].node,
9313                         gaudi_monitor_id_to_str[i].id);
9314
9315         sds->props = gaudi_state_dump_specs_props;
9316
9317         sds->sync_namager_names = gaudi_sync_manager_names;
9318
9319         sds->funcs = gaudi_state_dump_funcs;
9320 }
9321
9322 static u32 *gaudi_get_stream_master_qid_arr(void)
9323 {
9324         return gaudi_stream_master;
9325 }
9326
9327 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9328 {
9329         struct hl_device *hdev = dev_get_drvdata(dev);
9330         struct cpucp_info *cpucp_info;
9331
9332         cpucp_info = &hdev->asic_prop.cpucp_info;
9333
9334         return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9335 }
9336
9337 static DEVICE_ATTR_RO(infineon_ver);
9338
9339 static struct attribute *gaudi_vrm_dev_attrs[] = {
9340         &dev_attr_infineon_ver.attr,
9341 };
9342
9343 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9344                                         struct attribute_group *dev_vrm_attr_grp)
9345 {
9346         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9347         dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9348 }
9349
9350 static const struct hl_asic_funcs gaudi_funcs = {
9351         .early_init = gaudi_early_init,
9352         .early_fini = gaudi_early_fini,
9353         .late_init = gaudi_late_init,
9354         .late_fini = gaudi_late_fini,
9355         .sw_init = gaudi_sw_init,
9356         .sw_fini = gaudi_sw_fini,
9357         .hw_init = gaudi_hw_init,
9358         .hw_fini = gaudi_hw_fini,
9359         .halt_engines = gaudi_halt_engines,
9360         .suspend = gaudi_suspend,
9361         .resume = gaudi_resume,
9362         .mmap = gaudi_mmap,
9363         .ring_doorbell = gaudi_ring_doorbell,
9364         .pqe_write = gaudi_pqe_write,
9365         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9366         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9367         .scrub_device_mem = gaudi_scrub_device_mem,
9368         .get_int_queue_base = gaudi_get_int_queue_base,
9369         .test_queues = gaudi_test_queues,
9370         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9371         .asic_dma_pool_free = gaudi_dma_pool_free,
9372         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9373         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9374         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9375         .cs_parser = gaudi_cs_parser,
9376         .asic_dma_map_sg = gaudi_dma_map_sg,
9377         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9378         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9379         .update_eq_ci = gaudi_update_eq_ci,
9380         .context_switch = gaudi_context_switch,
9381         .restore_phase_topology = gaudi_restore_phase_topology,
9382         .debugfs_read32 = gaudi_debugfs_read32,
9383         .debugfs_write32 = gaudi_debugfs_write32,
9384         .debugfs_read64 = gaudi_debugfs_read64,
9385         .debugfs_write64 = gaudi_debugfs_write64,
9386         .debugfs_read_dma = gaudi_debugfs_read_dma,
9387         .add_device_attr = gaudi_add_device_attr,
9388         .handle_eqe = gaudi_handle_eqe,
9389         .get_events_stat = gaudi_get_events_stat,
9390         .read_pte = gaudi_read_pte,
9391         .write_pte = gaudi_write_pte,
9392         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9393         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9394         .send_heartbeat = gaudi_send_heartbeat,
9395         .debug_coresight = gaudi_debug_coresight,
9396         .is_device_idle = gaudi_is_device_idle,
9397         .non_hard_reset_late_init = gaudi_non_hard_reset_late_init,
9398         .hw_queues_lock = gaudi_hw_queues_lock,
9399         .hw_queues_unlock = gaudi_hw_queues_unlock,
9400         .get_pci_id = gaudi_get_pci_id,
9401         .get_eeprom_data = gaudi_get_eeprom_data,
9402         .send_cpu_message = gaudi_send_cpu_message,
9403         .pci_bars_map = gaudi_pci_bars_map,
9404         .init_iatu = gaudi_init_iatu,
9405         .rreg = hl_rreg,
9406         .wreg = hl_wreg,
9407         .halt_coresight = gaudi_halt_coresight,
9408         .ctx_init = gaudi_ctx_init,
9409         .ctx_fini = gaudi_ctx_fini,
9410         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9411         .load_firmware_to_device = gaudi_load_firmware_to_device,
9412         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9413         .get_signal_cb_size = gaudi_get_signal_cb_size,
9414         .get_wait_cb_size = gaudi_get_wait_cb_size,
9415         .gen_signal_cb = gaudi_gen_signal_cb,
9416         .gen_wait_cb = gaudi_gen_wait_cb,
9417         .reset_sob = gaudi_reset_sob,
9418         .reset_sob_group = gaudi_reset_sob_group,
9419         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9420         .get_device_time = gaudi_get_device_time,
9421         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9422         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9423         .scramble_addr = hl_mmu_scramble_addr,
9424         .descramble_addr = hl_mmu_descramble_addr,
9425         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9426         .get_hw_block_id = gaudi_get_hw_block_id,
9427         .hw_block_mmap = gaudi_block_mmap,
9428         .enable_events_from_fw = gaudi_enable_events_from_fw,
9429         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9430         .init_firmware_loader = gaudi_init_firmware_loader,
9431         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9432         .state_dump_init = gaudi_state_dump_init,
9433         .get_sob_addr = gaudi_get_sob_addr,
9434         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9435         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
9436 };
9437
9438 /**
9439  * gaudi_set_asic_funcs - set GAUDI function pointers
9440  *
9441  * @hdev: pointer to hl_device structure
9442  *
9443  */
9444 void gaudi_set_asic_funcs(struct hl_device *hdev)
9445 {
9446         hdev->asic_funcs = &gaudi_funcs;
9447 }