866dc4b891d61feb16d36cb35e190d0861c29cb4
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0xEE6b27FF /* 8 seconds */
99
100 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
101
102 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
103
104 #define MONITOR_SOB_STRING_SIZE         256
105
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107         GAUDI_QUEUE_ID_DMA_0_0,
108         GAUDI_QUEUE_ID_DMA_0_1,
109         GAUDI_QUEUE_ID_DMA_0_2,
110         GAUDI_QUEUE_ID_DMA_0_3,
111         GAUDI_QUEUE_ID_DMA_1_0,
112         GAUDI_QUEUE_ID_DMA_1_1,
113         GAUDI_QUEUE_ID_DMA_1_2,
114         GAUDI_QUEUE_ID_DMA_1_3
115 };
116
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121                 "gaudi cpu eq"
122 };
123
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136         [0] = GAUDI_QUEUE_ID_DMA_0_0,
137         [1] = GAUDI_QUEUE_ID_DMA_0_1,
138         [2] = GAUDI_QUEUE_ID_DMA_0_2,
139         [3] = GAUDI_QUEUE_ID_DMA_0_3,
140         [4] = GAUDI_QUEUE_ID_DMA_1_0,
141         [5] = GAUDI_QUEUE_ID_DMA_1_1,
142         [6] = GAUDI_QUEUE_ID_DMA_1_2,
143         [7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
148         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
149         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
150         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
151         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
152         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
153         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
154         [PACKET_FENCE]          = sizeof(struct packet_fence),
155         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
156         [PACKET_NOP]            = sizeof(struct packet_nop),
157         [PACKET_STOP]           = sizeof(struct packet_stop),
158         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
159         [PACKET_WAIT]           = sizeof(struct packet_wait),
160         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
161 };
162
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165         switch (id) {
166         case PACKET_WREG_32:
167         case PACKET_WREG_BULK:
168         case PACKET_MSG_LONG:
169         case PACKET_MSG_SHORT:
170         case PACKET_CP_DMA:
171         case PACKET_REPEAT:
172         case PACKET_MSG_PROT:
173         case PACKET_FENCE:
174         case PACKET_LIN_DMA:
175         case PACKET_NOP:
176         case PACKET_STOP:
177         case PACKET_ARB_POINT:
178         case PACKET_WAIT:
179         case PACKET_LOAD_AND_EXE:
180                 return true;
181         default:
182                 return false;
183         }
184 }
185
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188         "tpc_address_exceed_slm",
189         "tpc_div_by_0",
190         "tpc_spu_mac_overflow",
191         "tpc_spu_addsub_overflow",
192         "tpc_spu_abs_overflow",
193         "tpc_spu_fp_dst_nan_inf",
194         "tpc_spu_fp_dst_denorm",
195         "tpc_vpu_mac_overflow",
196         "tpc_vpu_addsub_overflow",
197         "tpc_vpu_abs_overflow",
198         "tpc_vpu_fp_dst_nan_inf",
199         "tpc_vpu_fp_dst_denorm",
200         "tpc_assertions",
201         "tpc_illegal_instruction",
202         "tpc_pc_wrap_around",
203         "tpc_qm_sw_err",
204         "tpc_hbw_rresp_err",
205         "tpc_hbw_bresp_err",
206         "tpc_lbw_rresp_err",
207         "tpc_lbw_bresp_err"
208 };
209
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212         "PQ AXI HBW error",
213         "CQ AXI HBW error",
214         "CP AXI HBW error",
215         "CP error due to undefined OPCODE",
216         "CP encountered STOP OPCODE",
217         "CP AXI LBW error",
218         "CP WRREG32 or WRBULK returned error",
219         "N/A",
220         "FENCE 0 inc over max value and clipped",
221         "FENCE 1 inc over max value and clipped",
222         "FENCE 2 inc over max value and clipped",
223         "FENCE 3 inc over max value and clipped",
224         "FENCE 0 dec under min value and clipped",
225         "FENCE 1 dec under min value and clipped",
226         "FENCE 2 dec under min value and clipped",
227         "FENCE 3 dec under min value and clipped"
228 };
229
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232         "Choice push while full error",
233         "Choice Q watchdog error",
234         "MSG AXI LBW returned with error"
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 static const int gaudi_queue_id_to_engine_id[] = {
434         [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435         [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436         [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437         [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438         [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439         [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440         [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441         [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442         [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443         [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444         [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445         [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446         [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447         [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448         [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449         [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450         [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451         [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452         [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453         [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454         [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455         [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456         [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457         [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458         [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459         [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460         [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461         [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462         [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469         "SYNC_MGR_E_N",
470         "SYNC_MGR_W_N",
471         "SYNC_MGR_E_S",
472         "SYNC_MGR_W_S",
473         NULL
474 };
475
476 struct ecc_info_extract_params {
477         u64 block_address;
478         u32 num_memories;
479         bool derr;
480 };
481
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483                                                                 u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485                                         struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487                                         u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489                                         u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491                                 u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497                                 u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499                                 struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504                 return HL_COLLECTIVE_MASTER;
505
506         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508                 return HL_COLLECTIVE_SLAVE;
509
510         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512                 return HL_COLLECTIVE_SLAVE;
513
514         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516                 return HL_COLLECTIVE_SLAVE;
517
518         return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523         struct asic_fixed_properties *prop = &hdev->asic_prop;
524
525         if (hdev->card_type == cpucp_card_type_pmc) {
526                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527
528                 if (prop->fw_security_enabled)
529                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530                 else
531                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532         } else {
533                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535         }
536 }
537
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540         struct asic_fixed_properties *prop = &hdev->asic_prop;
541         u32 num_sync_stream_queues = 0;
542         int i;
543
544         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545         prop->hw_queues_props = kcalloc(prop->max_queues,
546                         sizeof(struct hw_queue_properties),
547                         GFP_KERNEL);
548
549         if (!prop->hw_queues_props)
550                 return -ENOMEM;
551
552         for (i = 0 ; i < prop->max_queues ; i++) {
553                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555                         prop->hw_queues_props[i].driver_only = 0;
556                         prop->hw_queues_props[i].supports_sync_stream = 1;
557                         prop->hw_queues_props[i].cb_alloc_flags =
558                                 CB_ALLOC_KERNEL;
559                         num_sync_stream_queues++;
560                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562                         prop->hw_queues_props[i].driver_only = 1;
563                         prop->hw_queues_props[i].supports_sync_stream = 0;
564                         prop->hw_queues_props[i].cb_alloc_flags =
565                                 CB_ALLOC_KERNEL;
566                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568                         prop->hw_queues_props[i].driver_only = 0;
569                         prop->hw_queues_props[i].supports_sync_stream = 0;
570                         prop->hw_queues_props[i].cb_alloc_flags =
571                                 CB_ALLOC_USER;
572
573                 }
574                 prop->hw_queues_props[i].collective_mode =
575                                                 get_collective_mode(hdev, i);
576         }
577
578         prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579         prop->cfg_base_address = CFG_BASE;
580         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581         prop->host_base_address = HOST_PHYS_BASE;
582         prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584         prop->completion_mode = HL_COMPLETION_MODE_JOB;
585         prop->collective_first_sob = 0;
586         prop->collective_first_mon = 0;
587
588         /* 2 SOBs per internal queue stream are reserved for collective */
589         prop->sync_stream_first_sob =
590                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591                         * QMAN_STREAMS * HL_RSVD_SOBS;
592
593         /* 1 monitor per internal queue stream are reserved for collective
594          * 2 monitors per external queue stream are reserved for collective
595          */
596         prop->sync_stream_first_mon =
597                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598                         (NUMBER_OF_EXT_HW_QUEUES * 2);
599
600         prop->dram_base_address = DRAM_PHYS_BASE;
601         prop->dram_size = GAUDI_HBM_SIZE_32GB;
602         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604
605         prop->sram_base_address = SRAM_BASE_ADDR;
606         prop->sram_size = SRAM_SIZE;
607         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608         prop->sram_user_base_address =
609                         prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610
611         prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612         prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613
614         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615         if (hdev->pldm)
616                 prop->mmu_pgt_size = 0x800000; /* 8MB */
617         else
618                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619         prop->mmu_pte_size = HL_PTE_SIZE;
620         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622         prop->dram_page_size = PAGE_SIZE_2MB;
623         prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624         prop->dram_supports_virtual_memory = false;
625
626         prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627         prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628         prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629         prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630         prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631         prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632         prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633         prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634         prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635         prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636         prop->pmmu.start_addr = VA_HOST_SPACE_START;
637         prop->pmmu.end_addr =
638                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639         prop->pmmu.page_size = PAGE_SIZE_4KB;
640         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641         prop->pmmu.last_mask = LAST_MASK;
642         /* TODO: will be duplicated until implementing per-MMU props */
643         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645
646         /* PMMU and HPMMU are the same except of page size */
647         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649
650         /* shifts and masks are the same in PMMU and DMMU */
651         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653         prop->dmmu.end_addr = VA_HOST_SPACE_END;
654         prop->dmmu.page_size = PAGE_SIZE_2MB;
655
656         prop->cfg_size = CFG_SIZE;
657         prop->max_asid = MAX_ASID;
658         prop->num_of_events = GAUDI_EVENT_SIZE;
659         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660
661         set_default_power_values(hdev);
662
663         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665
666         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668
669         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670                                         CARD_NAME_MAX_LEN);
671
672         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673
674         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675                         prop->sync_stream_first_sob +
676                         (num_sync_stream_queues * HL_RSVD_SOBS);
677         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678                         prop->sync_stream_first_mon +
679                         (num_sync_stream_queues * HL_RSVD_MONS);
680
681         prop->first_available_user_interrupt = USHRT_MAX;
682
683         for (i = 0 ; i < HL_MAX_DCORES ; i++)
684                 prop->first_available_cq[i] = USHRT_MAX;
685
686         prop->fw_cpu_boot_dev_sts0_valid = false;
687         prop->fw_cpu_boot_dev_sts1_valid = false;
688         prop->hard_reset_done_by_fw = false;
689         prop->gic_interrupts_enable = true;
690
691         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692
693         prop->clk_pll_index = HL_GAUDI_MME_PLL;
694         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695
696         prop->use_get_power_for_reset_history = true;
697
698         prop->configurable_stop_on_err = true;
699
700         prop->set_max_power_on_device_init = true;
701
702         prop->dma_mask = 48;
703
704         return 0;
705 }
706
707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709         static const char * const name[] = {"SRAM", "CFG", "HBM"};
710         bool is_wc[3] = {false, false, true};
711         int rc;
712
713         rc = hl_pci_bars_map(hdev, name, is_wc);
714         if (rc)
715                 return rc;
716
717         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
719
720         return 0;
721 }
722
723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725         struct gaudi_device *gaudi = hdev->asic_specific;
726         struct hl_inbound_pci_region pci_region;
727         u64 old_addr = addr;
728         int rc;
729
730         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731                 return old_addr;
732
733         if (hdev->asic_prop.iatu_done_by_fw)
734                 return U64_MAX;
735
736         /* Inbound Region 2 - Bar 4 - Point to HBM */
737         pci_region.mode = PCI_BAR_MATCH_MODE;
738         pci_region.bar = HBM_BAR_ID;
739         pci_region.addr = addr;
740         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741         if (rc)
742                 return U64_MAX;
743
744         if (gaudi) {
745                 old_addr = gaudi->hbm_bar_cur_addr;
746                 gaudi->hbm_bar_cur_addr = addr;
747         }
748
749         return old_addr;
750 }
751
752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754         struct hl_inbound_pci_region inbound_region;
755         struct hl_outbound_pci_region outbound_region;
756         int rc;
757
758         if (hdev->asic_prop.iatu_done_by_fw)
759                 return 0;
760
761         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762         inbound_region.mode = PCI_BAR_MATCH_MODE;
763         inbound_region.bar = SRAM_BAR_ID;
764         inbound_region.addr = SRAM_BASE_ADDR;
765         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766         if (rc)
767                 goto done;
768
769         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770         inbound_region.mode = PCI_BAR_MATCH_MODE;
771         inbound_region.bar = CFG_BAR_ID;
772         inbound_region.addr = SPI_FLASH_BASE_ADDR;
773         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774         if (rc)
775                 goto done;
776
777         /* Inbound Region 2 - Bar 4 - Point to HBM */
778         inbound_region.mode = PCI_BAR_MATCH_MODE;
779         inbound_region.bar = HBM_BAR_ID;
780         inbound_region.addr = DRAM_PHYS_BASE;
781         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782         if (rc)
783                 goto done;
784
785         /* Outbound Region 0 - Point to Host */
786         outbound_region.addr = HOST_PHYS_BASE;
787         outbound_region.size = HOST_PHYS_SIZE;
788         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789
790 done:
791         return rc;
792 }
793
794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796         return RREG32(mmHW_STATE);
797 }
798
799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801         struct asic_fixed_properties *prop = &hdev->asic_prop;
802         struct pci_dev *pdev = hdev->pdev;
803         resource_size_t pci_bar_size;
804         u32 fw_boot_status;
805         int rc;
806
807         rc = gaudi_set_fixed_properties(hdev);
808         if (rc) {
809                 dev_err(hdev->dev, "Failed setting fixed properties\n");
810                 return rc;
811         }
812
813         /* Check BAR sizes */
814         pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815
816         if (pci_bar_size != SRAM_BAR_SIZE) {
817                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818                         SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819                 rc = -ENODEV;
820                 goto free_queue_props;
821         }
822
823         pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824
825         if (pci_bar_size != CFG_BAR_SIZE) {
826                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827                         CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828                 rc = -ENODEV;
829                 goto free_queue_props;
830         }
831
832         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834
835         /* If FW security is enabled at this point it means no access to ELBI */
836         if (hdev->asic_prop.fw_security_enabled) {
837                 hdev->asic_prop.iatu_done_by_fw = true;
838
839                 /*
840                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841                  * decision can only be taken based on PCI ID security.
842                  */
843                 hdev->asic_prop.gic_interrupts_enable = false;
844                 goto pci_init;
845         }
846
847         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848                                 &fw_boot_status);
849         if (rc)
850                 goto free_queue_props;
851
852         /* Check whether FW is configuring iATU */
853         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855                 hdev->asic_prop.iatu_done_by_fw = true;
856
857 pci_init:
858         rc = hl_pci_init(hdev);
859         if (rc)
860                 goto free_queue_props;
861
862         /* Before continuing in the initialization, we need to read the preboot
863          * version to determine whether we run with a security-enabled firmware
864          */
865         rc = hl_fw_read_preboot_status(hdev);
866         if (rc) {
867                 if (hdev->reset_on_preboot_fail)
868                         hdev->asic_funcs->hw_fini(hdev, true, false);
869                 goto pci_fini;
870         }
871
872         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
873                 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
874                 hdev->asic_funcs->hw_fini(hdev, true, false);
875         }
876
877         return 0;
878
879 pci_fini:
880         hl_pci_fini(hdev);
881 free_queue_props:
882         kfree(hdev->asic_prop.hw_queues_props);
883         return rc;
884 }
885
886 static int gaudi_early_fini(struct hl_device *hdev)
887 {
888         kfree(hdev->asic_prop.hw_queues_props);
889         hl_pci_fini(hdev);
890
891         return 0;
892 }
893
894 /**
895  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
896  *
897  * @hdev: pointer to hl_device structure
898  *
899  */
900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
901 {
902         struct asic_fixed_properties *prop = &hdev->asic_prop;
903         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
904         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
905         int rc;
906
907         if (hdev->asic_prop.fw_security_enabled) {
908                 struct gaudi_device *gaudi = hdev->asic_specific;
909
910                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
911                         return 0;
912
913                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
914
915                 if (rc)
916                         return rc;
917
918                 freq = pll_freq_arr[2];
919         } else {
920                 /* Backward compatibility */
921                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
922                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
923                 nr = RREG32(mmPSOC_CPU_PLL_NR);
924                 nf = RREG32(mmPSOC_CPU_PLL_NF);
925                 od = RREG32(mmPSOC_CPU_PLL_OD);
926
927                 if (div_sel == DIV_SEL_REF_CLK ||
928                                 div_sel == DIV_SEL_DIVIDED_REF) {
929                         if (div_sel == DIV_SEL_REF_CLK)
930                                 freq = PLL_REF_CLK;
931                         else
932                                 freq = PLL_REF_CLK / (div_fctr + 1);
933                 } else if (div_sel == DIV_SEL_PLL_CLK ||
934                         div_sel == DIV_SEL_DIVIDED_PLL) {
935                         pll_clk = PLL_REF_CLK * (nf + 1) /
936                                         ((nr + 1) * (od + 1));
937                         if (div_sel == DIV_SEL_PLL_CLK)
938                                 freq = pll_clk;
939                         else
940                                 freq = pll_clk / (div_fctr + 1);
941                 } else {
942                         dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
943                         freq = 0;
944                 }
945         }
946
947         prop->psoc_timestamp_frequency = freq;
948         prop->psoc_pci_pll_nr = nr;
949         prop->psoc_pci_pll_nf = nf;
950         prop->psoc_pci_pll_od = od;
951         prop->psoc_pci_pll_div_factor = div_fctr;
952
953         return 0;
954 }
955
956 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
957                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
958 {
959         struct asic_fixed_properties *prop = &hdev->asic_prop;
960         struct packet_lin_dma *init_tpc_mem_pkt;
961         struct hl_cs_job *job;
962         struct hl_cb *cb;
963         u64 dst_addr;
964         u32 cb_size, ctl;
965         u8 tpc_id;
966         int rc;
967
968         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
969         if (!cb)
970                 return -EFAULT;
971
972         init_tpc_mem_pkt = cb->kernel_address;
973         cb_size = sizeof(*init_tpc_mem_pkt);
974         memset(init_tpc_mem_pkt, 0, cb_size);
975
976         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
977
978         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
979         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
980         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
981         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
982
983         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
984
985         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
986         dst_addr = (prop->sram_user_base_address &
987                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
988                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
989         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
990
991         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
992         if (!job) {
993                 dev_err(hdev->dev, "Failed to allocate a new job\n");
994                 rc = -ENOMEM;
995                 goto release_cb;
996         }
997
998         job->id = 0;
999         job->user_cb = cb;
1000         atomic_inc(&job->user_cb->cs_cnt);
1001         job->user_cb_size = cb_size;
1002         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1003         job->patched_cb = job->user_cb;
1004         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1005
1006         hl_debugfs_add_job(hdev, job);
1007
1008         rc = gaudi_send_job_on_qman0(hdev, job);
1009
1010         if (rc)
1011                 goto free_job;
1012
1013         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1014                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1015                 if (rc)
1016                         break;
1017         }
1018
1019 free_job:
1020         hl_userptr_delete_list(hdev, &job->userptr_list);
1021         hl_debugfs_remove_job(hdev, job);
1022         kfree(job);
1023         atomic_dec(&cb->cs_cnt);
1024
1025 release_cb:
1026         hl_cb_put(cb);
1027         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1028
1029         return rc;
1030 }
1031
1032 /*
1033  * gaudi_init_tpc_mem() - Initialize TPC memories.
1034  * @hdev: Pointer to hl_device structure.
1035  *
1036  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1037  *
1038  * Return: 0 for success, negative value for error.
1039  */
1040 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1041 {
1042         const struct firmware *fw;
1043         size_t fw_size;
1044         void *cpu_addr;
1045         dma_addr_t dma_handle;
1046         int rc, count = 5;
1047
1048 again:
1049         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1050         if (rc == -EINTR && count-- > 0) {
1051                 msleep(50);
1052                 goto again;
1053         }
1054
1055         if (rc) {
1056                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1057                                 GAUDI_TPC_FW_FILE);
1058                 goto out;
1059         }
1060
1061         fw_size = fw->size;
1062         cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1063         if (!cpu_addr) {
1064                 dev_err(hdev->dev,
1065                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1066                         fw_size);
1067                 rc = -ENOMEM;
1068                 goto out;
1069         }
1070
1071         memcpy(cpu_addr, fw->data, fw_size);
1072
1073         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1074
1075         hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1076
1077 out:
1078         release_firmware(fw);
1079         return rc;
1080 }
1081
1082 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1083 {
1084         struct gaudi_device *gaudi = hdev->asic_specific;
1085         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1086         struct hl_hw_queue *q;
1087         u32 i, sob_id, sob_group_id, queue_id;
1088
1089         /* Iterate through SOB groups and assign a SOB for each slave queue */
1090         sob_group_id =
1091                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1092         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1093
1094         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1095         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1096                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1097                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1098         }
1099
1100         /* Both DMA5 and TPC7 use the same resources since only a single
1101          * engine need to participate in the reduction process
1102          */
1103         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1104         q = &hdev->kernel_queues[queue_id];
1105         q->sync_stream_prop.collective_sob_id =
1106                         sob_id + NIC_NUMBER_OF_ENGINES;
1107
1108         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1109         q = &hdev->kernel_queues[queue_id];
1110         q->sync_stream_prop.collective_sob_id =
1111                         sob_id + NIC_NUMBER_OF_ENGINES;
1112 }
1113
1114 static void gaudi_sob_group_hw_reset(struct kref *ref)
1115 {
1116         struct gaudi_hw_sob_group *hw_sob_group =
1117                 container_of(ref, struct gaudi_hw_sob_group, kref);
1118         struct hl_device *hdev = hw_sob_group->hdev;
1119         int i;
1120
1121         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1122                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1123                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1124
1125         kref_init(&hw_sob_group->kref);
1126 }
1127
1128 static void gaudi_sob_group_reset_error(struct kref *ref)
1129 {
1130         struct gaudi_hw_sob_group *hw_sob_group =
1131                 container_of(ref, struct gaudi_hw_sob_group, kref);
1132         struct hl_device *hdev = hw_sob_group->hdev;
1133
1134         dev_crit(hdev->dev,
1135                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1136                 hw_sob_group->base_sob_id);
1137 }
1138
1139 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1140 {
1141         struct gaudi_collective_properties *prop;
1142         int i;
1143
1144         prop = &gaudi->collective_props;
1145
1146         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1147
1148         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1149                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1150                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1151                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1152         /* Set collective engine bit */
1153         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1154                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1155 }
1156
1157 static int gaudi_collective_init(struct hl_device *hdev)
1158 {
1159         u32 i, sob_id, reserved_sobs_per_group;
1160         struct gaudi_collective_properties *prop;
1161         struct gaudi_device *gaudi;
1162
1163         gaudi = hdev->asic_specific;
1164         prop = &gaudi->collective_props;
1165         sob_id = hdev->asic_prop.collective_first_sob;
1166
1167         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1168         reserved_sobs_per_group =
1169                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1170
1171         /* Init SOB groups */
1172         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1173                 prop->hw_sob_group[i].hdev = hdev;
1174                 prop->hw_sob_group[i].base_sob_id = sob_id;
1175                 sob_id += reserved_sobs_per_group;
1176                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1177         }
1178
1179         for (i = 0 ; i < QMAN_STREAMS; i++) {
1180                 prop->next_sob_group_val[i] = 1;
1181                 prop->curr_sob_group_idx[i] = 0;
1182                 gaudi_collective_map_sobs(hdev, i);
1183         }
1184
1185         gaudi_collective_mstr_sob_mask_set(gaudi);
1186
1187         return 0;
1188 }
1189
1190 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1191 {
1192         struct gaudi_device *gaudi = hdev->asic_specific;
1193         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1194
1195         kref_put(&cprop->hw_sob_group[sob_group].kref,
1196                                         gaudi_sob_group_hw_reset);
1197 }
1198
1199 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1200                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1201 {
1202         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1203         struct gaudi_collective_properties *cprop;
1204         struct hl_gen_wait_properties wait_prop;
1205         struct hl_sync_stream_properties *prop;
1206         struct gaudi_device *gaudi;
1207
1208         gaudi = hdev->asic_specific;
1209         cprop = &gaudi->collective_props;
1210         queue_id = job->hw_queue_id;
1211         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1212
1213         master_sob_base =
1214                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1215         master_monitor = prop->collective_mstr_mon_id[0];
1216
1217         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1218
1219         dev_dbg(hdev->dev,
1220                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1221                 master_sob_base, cprop->mstr_sob_mask[0],
1222                 cprop->next_sob_group_val[stream],
1223                 master_monitor, queue_id);
1224
1225         wait_prop.data = (void *) job->patched_cb;
1226         wait_prop.sob_base = master_sob_base;
1227         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1228         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1229         wait_prop.mon_id = master_monitor;
1230         wait_prop.q_idx = queue_id;
1231         wait_prop.size = cb_size;
1232         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1233
1234         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1235         master_monitor = prop->collective_mstr_mon_id[1];
1236
1237         dev_dbg(hdev->dev,
1238                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1239                 master_sob_base, cprop->mstr_sob_mask[1],
1240                 cprop->next_sob_group_val[stream],
1241                 master_monitor, queue_id);
1242
1243         wait_prop.sob_base = master_sob_base;
1244         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1245         wait_prop.mon_id = master_monitor;
1246         wait_prop.size = cb_size;
1247         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1248 }
1249
1250 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1251                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1252 {
1253         struct hl_gen_wait_properties wait_prop;
1254         struct hl_sync_stream_properties *prop;
1255         u32 queue_id, cb_size = 0;
1256
1257         queue_id = job->hw_queue_id;
1258         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1259
1260         if (job->cs->encaps_signals) {
1261                 /* use the encaps signal handle store earlier in the flow
1262                  * and set the SOB information from the encaps
1263                  * signals handle
1264                  */
1265                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1266                                                 cs_cmpl);
1267
1268                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1269                                 job->cs->sequence,
1270                                 cs_cmpl->hw_sob->sob_id,
1271                                 cs_cmpl->sob_val);
1272         }
1273
1274         /* Add to wait CBs using slave monitor */
1275         wait_prop.data = (void *) job->user_cb;
1276         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1277         wait_prop.sob_mask = 0x1;
1278         wait_prop.sob_val = cs_cmpl->sob_val;
1279         wait_prop.mon_id = prop->collective_slave_mon_id;
1280         wait_prop.q_idx = queue_id;
1281         wait_prop.size = cb_size;
1282
1283         dev_dbg(hdev->dev,
1284                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1285                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1286                 prop->collective_slave_mon_id, queue_id);
1287
1288         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1289
1290         dev_dbg(hdev->dev,
1291                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1292                 prop->collective_sob_id, queue_id);
1293
1294         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1295                         prop->collective_sob_id, cb_size, false);
1296 }
1297
1298 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1299 {
1300         struct hl_cs_compl *signal_cs_cmpl =
1301                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1302         struct hl_cs_compl *cs_cmpl =
1303                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1304         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1305         struct gaudi_collective_properties *cprop;
1306         u32 stream, queue_id, sob_group_offset;
1307         struct gaudi_device *gaudi;
1308         struct hl_device *hdev;
1309         struct hl_cs_job *job;
1310         struct hl_ctx *ctx;
1311
1312         ctx = cs->ctx;
1313         hdev = ctx->hdev;
1314         gaudi = hdev->asic_specific;
1315         cprop = &gaudi->collective_props;
1316
1317         if (cs->encaps_signals) {
1318                 cs_cmpl->hw_sob = handle->hw_sob;
1319                 /* at this checkpoint we only need the hw_sob pointer
1320                  * for the completion check before start going over the jobs
1321                  * of the master/slaves, the sob_value will be taken later on
1322                  * in gaudi_collective_slave_init_job depends on each
1323                  * job wait offset value.
1324                  */
1325                 cs_cmpl->sob_val = 0;
1326         } else {
1327                 /* copy the SOB id and value of the signal CS */
1328                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1329                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1330         }
1331
1332         /* check again if the signal cs already completed.
1333          * if yes then don't send any wait cs since the hw_sob
1334          * could be in reset already. if signal is not completed
1335          * then get refcount to hw_sob to prevent resetting the sob
1336          * while wait cs is not submitted.
1337          * note that this check is protected by two locks,
1338          * hw queue lock and completion object lock,
1339          * and the same completion object lock also protects
1340          * the hw_sob reset handler function.
1341          * The hw_queue lock prevent out of sync of hw_sob
1342          * refcount value, changed by signal/wait flows.
1343          */
1344         spin_lock(&signal_cs_cmpl->lock);
1345
1346         if (completion_done(&cs->signal_fence->completion)) {
1347                 spin_unlock(&signal_cs_cmpl->lock);
1348                 return -EINVAL;
1349         }
1350         /* Increment kref since all slave queues are now waiting on it */
1351         kref_get(&cs_cmpl->hw_sob->kref);
1352
1353         spin_unlock(&signal_cs_cmpl->lock);
1354
1355         /* Calculate the stream from collective master queue (1st job) */
1356         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1357         stream = job->hw_queue_id % 4;
1358         sob_group_offset =
1359                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1360
1361         list_for_each_entry(job, &cs->job_list, cs_node) {
1362                 queue_id = job->hw_queue_id;
1363
1364                 if (hdev->kernel_queues[queue_id].collective_mode ==
1365                                 HL_COLLECTIVE_MASTER)
1366                         gaudi_collective_master_init_job(hdev, job, stream,
1367                                                 sob_group_offset);
1368                 else
1369                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1370         }
1371
1372         cs_cmpl->sob_group = sob_group_offset;
1373
1374         /* Handle sob group kref and wraparound */
1375         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1376         cprop->next_sob_group_val[stream]++;
1377
1378         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1379                 /*
1380                  * Decrement as we reached the max value.
1381                  * The release function won't be called here as we've
1382                  * just incremented the refcount.
1383                  */
1384                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1385                                 gaudi_sob_group_reset_error);
1386                 cprop->next_sob_group_val[stream] = 1;
1387                 /* only two SOBs are currently in use */
1388                 cprop->curr_sob_group_idx[stream] =
1389                         (cprop->curr_sob_group_idx[stream] + 1) &
1390                                                         (HL_RSVD_SOBS - 1);
1391
1392                 gaudi_collective_map_sobs(hdev, stream);
1393
1394                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1395                                 cprop->curr_sob_group_idx[stream], stream);
1396         }
1397
1398         mb();
1399         hl_fence_put(cs->signal_fence);
1400         cs->signal_fence = NULL;
1401
1402         return 0;
1403 }
1404
1405 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1406 {
1407         u32 cacheline_end, additional_commands;
1408
1409         cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1410         additional_commands = sizeof(struct packet_msg_prot) * 2;
1411
1412         if (user_cb_size + additional_commands > cacheline_end)
1413                 return cacheline_end - user_cb_size + additional_commands;
1414         else
1415                 return additional_commands;
1416 }
1417
1418 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1419                 struct hl_ctx *ctx, struct hl_cs *cs,
1420                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1421                 u32 encaps_signal_offset)
1422 {
1423         struct hw_queue_properties *hw_queue_prop;
1424         struct hl_cs_counters_atomic *cntr;
1425         struct hl_cs_job *job;
1426         struct hl_cb *cb;
1427         u32 cb_size;
1428         bool patched_cb;
1429
1430         cntr = &hdev->aggregated_cs_counters;
1431
1432         if (mode == HL_COLLECTIVE_MASTER) {
1433                 /* CB size of collective master queue contains
1434                  * 4 msg short packets for monitor 1 configuration
1435                  * 1 fence packet
1436                  * 4 msg short packets for monitor 2 configuration
1437                  * 1 fence packet
1438                  * 2 msg prot packets for completion and MSI
1439                  */
1440                 cb_size = sizeof(struct packet_msg_short) * 8 +
1441                                 sizeof(struct packet_fence) * 2 +
1442                                 sizeof(struct packet_msg_prot) * 2;
1443                 patched_cb = true;
1444         } else {
1445                 /* CB size of collective slave queues contains
1446                  * 4 msg short packets for monitor configuration
1447                  * 1 fence packet
1448                  * 1 additional msg short packet for sob signal
1449                  */
1450                 cb_size = sizeof(struct packet_msg_short) * 5 +
1451                                 sizeof(struct packet_fence);
1452                 patched_cb = false;
1453         }
1454
1455         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1456         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1457         if (!job) {
1458                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1459                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1460                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1461                 return -ENOMEM;
1462         }
1463
1464         /* Allocate internal mapped CB for non patched CBs */
1465         cb = hl_cb_kernel_create(hdev, cb_size,
1466                         hdev->mmu_enable && !patched_cb);
1467         if (!cb) {
1468                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470                 kfree(job);
1471                 return -EFAULT;
1472         }
1473
1474         job->id = 0;
1475         job->cs = cs;
1476         job->user_cb = cb;
1477         atomic_inc(&job->user_cb->cs_cnt);
1478         job->user_cb_size = cb_size;
1479         job->hw_queue_id = queue_id;
1480
1481         /* since its guaranteed to have only one chunk in the collective wait
1482          * cs, we can use this chunk to set the encapsulated signal offset
1483          * in the jobs.
1484          */
1485         if (cs->encaps_signals)
1486                 job->encaps_sig_wait_offset = encaps_signal_offset;
1487
1488         /*
1489          * No need in parsing, user CB is the patched CB.
1490          * We call hl_cb_destroy() out of two reasons - we don't need
1491          * the CB in the CB idr anymore and to decrement its refcount as
1492          * it was incremented inside hl_cb_kernel_create().
1493          */
1494         if (patched_cb)
1495                 job->patched_cb = job->user_cb;
1496         else
1497                 job->patched_cb = NULL;
1498
1499         job->job_cb_size = job->user_cb_size;
1500         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1501
1502         /* increment refcount as for external queues we get completion */
1503         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1504                 cs_get(cs);
1505
1506         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1507
1508         list_add_tail(&job->cs_node, &cs->job_list);
1509
1510         hl_debugfs_add_job(hdev, job);
1511
1512         return 0;
1513 }
1514
1515 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1516                 struct hl_ctx *ctx, struct hl_cs *cs,
1517                 u32 wait_queue_id, u32 collective_engine_id,
1518                 u32 encaps_signal_offset)
1519 {
1520         struct gaudi_device *gaudi = hdev->asic_specific;
1521         struct hw_queue_properties *hw_queue_prop;
1522         u32 queue_id, collective_queue, num_jobs;
1523         u32 stream, nic_queue, nic_idx = 0;
1524         bool skip;
1525         int i, rc = 0;
1526
1527         /* Verify wait queue id is configured as master */
1528         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1529         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1530                 dev_err(hdev->dev,
1531                         "Queue %d is not configured as collective master\n",
1532                         wait_queue_id);
1533                 return -EINVAL;
1534         }
1535
1536         /* Verify engine id is supported */
1537         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1538                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1539                 dev_err(hdev->dev,
1540                         "Collective wait does not support engine %u\n",
1541                         collective_engine_id);
1542                 return -EINVAL;
1543         }
1544
1545         stream = wait_queue_id % 4;
1546
1547         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1548                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1549         else
1550                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1551
1552         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1553         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1554
1555         /* First job goes to the collective master queue, it will wait for
1556          * the collective slave queues to finish execution.
1557          * The synchronization is done using two monitors:
1558          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1559          * reduction engine (DMA5/TPC7).
1560          *
1561          * Rest of the jobs goes to the collective slave queues which will
1562          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1563          */
1564         for (i = 0 ; i < num_jobs ; i++) {
1565                 if (i == 0) {
1566                         queue_id = wait_queue_id;
1567                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1568                                 HL_COLLECTIVE_MASTER, queue_id,
1569                                 wait_queue_id, encaps_signal_offset);
1570                 } else {
1571                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1572                                 if (gaudi->hw_cap_initialized &
1573                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1574                                         skip = false;
1575                                 else
1576                                         skip = true;
1577
1578                                 queue_id = nic_queue;
1579                                 nic_queue += 4;
1580                                 nic_idx++;
1581
1582                                 if (skip)
1583                                         continue;
1584                         } else {
1585                                 queue_id = collective_queue;
1586                         }
1587
1588                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1589                                 HL_COLLECTIVE_SLAVE, queue_id,
1590                                 wait_queue_id, encaps_signal_offset);
1591                 }
1592
1593                 if (rc)
1594                         return rc;
1595         }
1596
1597         return rc;
1598 }
1599
1600 static int gaudi_late_init(struct hl_device *hdev)
1601 {
1602         struct gaudi_device *gaudi = hdev->asic_specific;
1603         int rc;
1604
1605         rc = gaudi->cpucp_info_get(hdev);
1606         if (rc) {
1607                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1608                 return rc;
1609         }
1610
1611         if ((hdev->card_type == cpucp_card_type_pci) &&
1612                         (hdev->nic_ports_mask & 0x3)) {
1613                 dev_info(hdev->dev,
1614                         "PCI card detected, only 8 ports are enabled\n");
1615                 hdev->nic_ports_mask &= ~0x3;
1616
1617                 /* Stop and disable unused NIC QMANs */
1618                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1619                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1620                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1621
1622                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1623                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1624                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1625
1626                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1627                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1628
1629                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1630         }
1631
1632         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1633         if (rc) {
1634                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1635                 return rc;
1636         }
1637
1638         /* Scrub both SRAM and DRAM */
1639         rc = hdev->asic_funcs->scrub_device_mem(hdev);
1640         if (rc)
1641                 goto disable_pci_access;
1642
1643         rc = gaudi_fetch_psoc_frequency(hdev);
1644         if (rc) {
1645                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1646                 goto disable_pci_access;
1647         }
1648
1649         rc = gaudi_mmu_clear_pgt_range(hdev);
1650         if (rc) {
1651                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1652                 goto disable_pci_access;
1653         }
1654
1655         rc = gaudi_init_tpc_mem(hdev);
1656         if (rc) {
1657                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1658                 goto disable_pci_access;
1659         }
1660
1661         rc = gaudi_collective_init(hdev);
1662         if (rc) {
1663                 dev_err(hdev->dev, "Failed to init collective\n");
1664                 goto disable_pci_access;
1665         }
1666
1667         /* We only support a single ASID for the user, so for the sake of optimization, just
1668          * initialize the ASID one time during device initialization with the fixed value of 1
1669          */
1670         gaudi_mmu_prepare(hdev, 1);
1671
1672         hl_fw_set_pll_profile(hdev);
1673
1674         return 0;
1675
1676 disable_pci_access:
1677         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1678
1679         return rc;
1680 }
1681
1682 static void gaudi_late_fini(struct hl_device *hdev)
1683 {
1684         const struct hwmon_channel_info **channel_info_arr;
1685         int i = 0;
1686
1687         if (!hdev->hl_chip_info->info)
1688                 return;
1689
1690         channel_info_arr = hdev->hl_chip_info->info;
1691
1692         while (channel_info_arr[i]) {
1693                 kfree(channel_info_arr[i]->config);
1694                 kfree(channel_info_arr[i]);
1695                 i++;
1696         }
1697
1698         kfree(channel_info_arr);
1699
1700         hdev->hl_chip_info->info = NULL;
1701 }
1702
1703 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1704 {
1705         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1706         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1707         int i, j, rc = 0;
1708
1709         /*
1710          * The device CPU works with 40-bits addresses, while bit 39 must be set
1711          * to '1' when accessing the host.
1712          * Bits 49:39 of the full host address are saved for a later
1713          * configuration of the HW to perform extension to 50 bits.
1714          * Because there is a single HW register that holds the extension bits,
1715          * these bits must be identical in all allocated range.
1716          */
1717
1718         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1719                 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1720                                                                 &dma_addr_arr[i],
1721                                                                 GFP_KERNEL | __GFP_ZERO);
1722                 if (!virt_addr_arr[i]) {
1723                         rc = -ENOMEM;
1724                         goto free_dma_mem_arr;
1725                 }
1726
1727                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1728                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1729                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1730                         break;
1731         }
1732
1733         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1734                 dev_err(hdev->dev,
1735                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1736                 rc = -EFAULT;
1737                 goto free_dma_mem_arr;
1738         }
1739
1740         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1741         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1742         hdev->cpu_pci_msb_addr =
1743                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1744
1745         if (!hdev->asic_prop.fw_security_enabled)
1746                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1747
1748 free_dma_mem_arr:
1749         for (j = 0 ; j < i ; j++)
1750                 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1751                                                 dma_addr_arr[j]);
1752
1753         return rc;
1754 }
1755
1756 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1757 {
1758         struct gaudi_device *gaudi = hdev->asic_specific;
1759         struct gaudi_internal_qman_info *q;
1760         u32 i;
1761
1762         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1763                 q = &gaudi->internal_qmans[i];
1764                 if (!q->pq_kernel_addr)
1765                         continue;
1766                 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1767         }
1768 }
1769
1770 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1771 {
1772         struct gaudi_device *gaudi = hdev->asic_specific;
1773         struct gaudi_internal_qman_info *q;
1774         int rc, i;
1775
1776         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1777                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1778                         continue;
1779
1780                 q = &gaudi->internal_qmans[i];
1781
1782                 switch (i) {
1783                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1784                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1785                         break;
1786                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1787                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1788                         break;
1789                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1790                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1791                         break;
1792                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1793                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1794                         break;
1795                 default:
1796                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1797                         rc = -EINVAL;
1798                         goto free_internal_qmans_pq_mem;
1799                 }
1800
1801                 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1802                                                                 GFP_KERNEL | __GFP_ZERO);
1803                 if (!q->pq_kernel_addr) {
1804                         rc = -ENOMEM;
1805                         goto free_internal_qmans_pq_mem;
1806                 }
1807         }
1808
1809         return 0;
1810
1811 free_internal_qmans_pq_mem:
1812         gaudi_free_internal_qmans_pq_mem(hdev);
1813         return rc;
1814 }
1815
1816 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1817 {
1818         struct asic_fixed_properties *prop = &hdev->asic_prop;
1819         struct pci_mem_region *region;
1820
1821         /* CFG */
1822         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1823         region->region_base = CFG_BASE;
1824         region->region_size = CFG_SIZE;
1825         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1826         region->bar_size = CFG_BAR_SIZE;
1827         region->bar_id = CFG_BAR_ID;
1828         region->used = 1;
1829
1830         /* SRAM */
1831         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1832         region->region_base = SRAM_BASE_ADDR;
1833         region->region_size = SRAM_SIZE;
1834         region->offset_in_bar = 0;
1835         region->bar_size = SRAM_BAR_SIZE;
1836         region->bar_id = SRAM_BAR_ID;
1837         region->used = 1;
1838
1839         /* DRAM */
1840         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1841         region->region_base = DRAM_PHYS_BASE;
1842         region->region_size = hdev->asic_prop.dram_size;
1843         region->offset_in_bar = 0;
1844         region->bar_size = prop->dram_pci_bar_size;
1845         region->bar_id = HBM_BAR_ID;
1846         region->used = 1;
1847
1848         /* SP SRAM */
1849         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1850         region->region_base = PSOC_SCRATCHPAD_ADDR;
1851         region->region_size = PSOC_SCRATCHPAD_SIZE;
1852         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1853         region->bar_size = CFG_BAR_SIZE;
1854         region->bar_id = CFG_BAR_ID;
1855         region->used = 1;
1856 }
1857
1858 static int gaudi_sw_init(struct hl_device *hdev)
1859 {
1860         struct gaudi_device *gaudi;
1861         u32 i, event_id = 0;
1862         int rc;
1863
1864         /* Allocate device structure */
1865         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1866         if (!gaudi)
1867                 return -ENOMEM;
1868
1869         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1870                 if (gaudi_irq_map_table[i].valid) {
1871                         if (event_id == GAUDI_EVENT_SIZE) {
1872                                 dev_err(hdev->dev,
1873                                         "Event array exceeds the limit of %u events\n",
1874                                         GAUDI_EVENT_SIZE);
1875                                 rc = -EINVAL;
1876                                 goto free_gaudi_device;
1877                         }
1878
1879                         gaudi->events[event_id++] =
1880                                         gaudi_irq_map_table[i].fc_id;
1881                 }
1882         }
1883
1884         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1885
1886         hdev->asic_specific = gaudi;
1887
1888         /* Create DMA pool for small allocations */
1889         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1890                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1891         if (!hdev->dma_pool) {
1892                 dev_err(hdev->dev, "failed to create DMA pool\n");
1893                 rc = -ENOMEM;
1894                 goto free_gaudi_device;
1895         }
1896
1897         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1898         if (rc)
1899                 goto free_dma_pool;
1900
1901         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1902         if (!hdev->cpu_accessible_dma_pool) {
1903                 dev_err(hdev->dev,
1904                         "Failed to create CPU accessible DMA pool\n");
1905                 rc = -ENOMEM;
1906                 goto free_cpu_dma_mem;
1907         }
1908
1909         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1910                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1911                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1912         if (rc) {
1913                 dev_err(hdev->dev,
1914                         "Failed to add memory to CPU accessible DMA pool\n");
1915                 rc = -EFAULT;
1916                 goto free_cpu_accessible_dma_pool;
1917         }
1918
1919         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1920         if (rc)
1921                 goto free_cpu_accessible_dma_pool;
1922
1923         spin_lock_init(&gaudi->hw_queues_lock);
1924
1925         hdev->supports_sync_stream = true;
1926         hdev->supports_coresight = true;
1927         hdev->supports_staged_submission = true;
1928         hdev->supports_wait_for_multi_cs = true;
1929
1930         hdev->asic_funcs->set_pci_memory_regions(hdev);
1931         hdev->stream_master_qid_arr =
1932                                 hdev->asic_funcs->get_stream_master_qid_arr();
1933         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1934
1935         return 0;
1936
1937 free_cpu_accessible_dma_pool:
1938         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1939 free_cpu_dma_mem:
1940         if (!hdev->asic_prop.fw_security_enabled)
1941                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1942                                         hdev->cpu_pci_msb_addr);
1943         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1944                                         hdev->cpu_accessible_dma_address);
1945 free_dma_pool:
1946         dma_pool_destroy(hdev->dma_pool);
1947 free_gaudi_device:
1948         kfree(gaudi);
1949         return rc;
1950 }
1951
1952 static int gaudi_sw_fini(struct hl_device *hdev)
1953 {
1954         struct gaudi_device *gaudi = hdev->asic_specific;
1955
1956         gaudi_free_internal_qmans_pq_mem(hdev);
1957
1958         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1959
1960         if (!hdev->asic_prop.fw_security_enabled)
1961                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1962                                         hdev->cpu_pci_msb_addr);
1963
1964         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1965                                         hdev->cpu_accessible_dma_address);
1966
1967         dma_pool_destroy(hdev->dma_pool);
1968
1969         kfree(gaudi);
1970
1971         return 0;
1972 }
1973
1974 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1975 {
1976         struct hl_device *hdev = arg;
1977         int i;
1978
1979         if (hdev->disabled)
1980                 return IRQ_HANDLED;
1981
1982         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1983                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1984
1985         hl_irq_handler_eq(irq, &hdev->event_queue);
1986
1987         return IRQ_HANDLED;
1988 }
1989
1990 /*
1991  * For backward compatibility, new MSI interrupts should be set after the
1992  * existing CPU and NIC interrupts.
1993  */
1994 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1995                                 bool cpu_eq)
1996 {
1997         int msi_vec;
1998
1999         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
2000                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
2001                                 GAUDI_EVENT_QUEUE_MSI_IDX);
2002
2003         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
2004                         (nr + NIC_NUMBER_OF_ENGINES + 1);
2005
2006         return pci_irq_vector(hdev->pdev, msi_vec);
2007 }
2008
2009 static int gaudi_enable_msi_single(struct hl_device *hdev)
2010 {
2011         int rc, irq;
2012
2013         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2014
2015         irq = gaudi_pci_irq_vector(hdev, 0, false);
2016         rc = request_irq(irq, gaudi_irq_handler_single, 0,
2017                         "gaudi single msi", hdev);
2018         if (rc)
2019                 dev_err(hdev->dev,
2020                         "Failed to request single MSI IRQ\n");
2021
2022         return rc;
2023 }
2024
2025 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2026 {
2027         int cq_cnt = hdev->asic_prop.completion_queues_count;
2028         int rc, i, irq_cnt_init, irq;
2029
2030         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2031                 irq = gaudi_pci_irq_vector(hdev, i, false);
2032                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2033                                 &hdev->completion_queue[i]);
2034                 if (rc) {
2035                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2036                         goto free_irqs;
2037                 }
2038         }
2039
2040         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2041         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2042                                 &hdev->event_queue);
2043         if (rc) {
2044                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2045                 goto free_irqs;
2046         }
2047
2048         return 0;
2049
2050 free_irqs:
2051         for (i = 0 ; i < irq_cnt_init ; i++)
2052                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2053                                 &hdev->completion_queue[i]);
2054         return rc;
2055 }
2056
2057 static int gaudi_enable_msi(struct hl_device *hdev)
2058 {
2059         struct gaudi_device *gaudi = hdev->asic_specific;
2060         int rc;
2061
2062         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2063                 return 0;
2064
2065         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2066         if (rc < 0) {
2067                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2068                 return rc;
2069         }
2070
2071         if (rc < NUMBER_OF_INTERRUPTS) {
2072                 gaudi->multi_msi_mode = false;
2073                 rc = gaudi_enable_msi_single(hdev);
2074         } else {
2075                 gaudi->multi_msi_mode = true;
2076                 rc = gaudi_enable_msi_multi(hdev);
2077         }
2078
2079         if (rc)
2080                 goto free_pci_irq_vectors;
2081
2082         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2083
2084         return 0;
2085
2086 free_pci_irq_vectors:
2087         pci_free_irq_vectors(hdev->pdev);
2088         return rc;
2089 }
2090
2091 static void gaudi_sync_irqs(struct hl_device *hdev)
2092 {
2093         struct gaudi_device *gaudi = hdev->asic_specific;
2094         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2095
2096         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2097                 return;
2098
2099         /* Wait for all pending IRQs to be finished */
2100         if (gaudi->multi_msi_mode) {
2101                 for (i = 0 ; i < cq_cnt ; i++)
2102                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2103
2104                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2105                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2106                                                 true));
2107         } else {
2108                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2109         }
2110 }
2111
2112 static void gaudi_disable_msi(struct hl_device *hdev)
2113 {
2114         struct gaudi_device *gaudi = hdev->asic_specific;
2115         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2116
2117         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2118                 return;
2119
2120         gaudi_sync_irqs(hdev);
2121
2122         if (gaudi->multi_msi_mode) {
2123                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2124                                                 true);
2125                 free_irq(irq, &hdev->event_queue);
2126
2127                 for (i = 0 ; i < cq_cnt ; i++) {
2128                         irq = gaudi_pci_irq_vector(hdev, i, false);
2129                         free_irq(irq, &hdev->completion_queue[i]);
2130                 }
2131         } else {
2132                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2133         }
2134
2135         pci_free_irq_vectors(hdev->pdev);
2136
2137         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2138 }
2139
2140 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2141 {
2142         struct gaudi_device *gaudi = hdev->asic_specific;
2143
2144         if (hdev->asic_prop.fw_security_enabled)
2145                 return;
2146
2147         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2148                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2149                 return;
2150
2151         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2152                 return;
2153
2154         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2155                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2156         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2157                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2159                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2161                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2163                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2165                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2167                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2168         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2169                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2170
2171         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2172                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2173         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2174                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2175         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2176                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2177         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2178                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2179         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2180                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2181         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2182                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2183         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2184                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2185         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2186                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2187
2188         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2189                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2190         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2191                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2192         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2193                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2194         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2195                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2196         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2197                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2198         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2199                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2200         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2201                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2202         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2203                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2204
2205         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2206 }
2207
2208 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2209 {
2210         struct gaudi_device *gaudi = hdev->asic_specific;
2211
2212         if (hdev->asic_prop.fw_security_enabled)
2213                 return;
2214
2215         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2216                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2217                 return;
2218
2219         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2220                 return;
2221
2222         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2223                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2224         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2225                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2227                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2228         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2229                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2230         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2231                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2232         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2233                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2234         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2235                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2236         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2237                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2238
2239         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2240                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2241         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2242                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2243         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2244                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2245         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2246                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2247         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2248                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2249         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2250                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2251         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2252                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2253         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2254                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2255
2256         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2257                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2258         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2259                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2260         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2261                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2262         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2263                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2264         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2265                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2266         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2267                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2268         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2269                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2270         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2271                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2272
2273         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2274 }
2275
2276 static void gaudi_init_e2e(struct hl_device *hdev)
2277 {
2278         if (hdev->asic_prop.fw_security_enabled)
2279                 return;
2280
2281         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2282                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2283                 return;
2284
2285         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2286         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2287         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2288         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2289
2290         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2291         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2292         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2293         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2294
2295         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2296         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2297         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2298         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2299
2300         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2301         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2302         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2303         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2304
2305         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2306         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2307         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2308         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2309
2310         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2311         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2312         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2313         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2314
2315         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2316         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2317         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2318         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2319
2320         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2321         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2322         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2323         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2324
2325         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2326         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2327         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2328         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2329
2330         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2331         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2332         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2333         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2334
2335         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2336         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2337         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2338         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2339
2340         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2341         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2342         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2343         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2344
2345         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2346         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2347         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2348         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2349
2350         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2351         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2352         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2353         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2354
2355         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2356         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2357         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2358         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2359
2360         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2361         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2362         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2363         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2364
2365         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2366         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2367         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2368         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2369
2370         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2371         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2372         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2373         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2374
2375         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2376         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2377         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2378         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2379
2380         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2381         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2382         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2383         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2384
2385         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2386         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2387         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2388         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2389
2390         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2391         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2392         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2393         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2394
2395         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2396         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2397         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2398         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2399
2400         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2401         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2402         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2403         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2404
2405         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2406                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2408                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2411                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2413                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414
2415         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2416                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2417         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2418                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2419
2420         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2421                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2422         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2423                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2424
2425         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2426                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2427         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2428                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2429
2430         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2431                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2432         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2433                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2434
2435         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2436                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2437         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2438                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2439
2440         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2441                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2442         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2443                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2444
2445         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2446                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2447         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2448                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2449
2450         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2451                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2452         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2453                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2454
2455         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2456                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2457         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2458                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2459
2460         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2461                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2462         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2463                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2464
2465         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2466                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2467         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2468                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2469
2470         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2471                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2472         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2473                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2474
2475         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2476                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2477         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2478                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2479
2480         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2481                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2482         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2483                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2484
2485         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2486                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2487         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2488                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2489
2490         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2491                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2492         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2493                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2494
2495         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2496                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2497         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2498                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2499
2500         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2501                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2502         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2503                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2504
2505         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2506                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2507         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2508                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2509
2510         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2511                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2512         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2513                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2514
2515         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2516                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2517         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2518                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2519
2520         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2521                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2522         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2523                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2524 }
2525
2526 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2527 {
2528         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2529
2530         if (hdev->asic_prop.fw_security_enabled)
2531                 return;
2532
2533         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2534                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2535                 return;
2536
2537         hbm0_wr = 0x33333333;
2538         hbm0_rd = 0x77777777;
2539         hbm1_wr = 0x55555555;
2540         hbm1_rd = 0xDDDDDDDD;
2541
2542         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2543         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2544         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2545         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2546
2547         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2548         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2549         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2550         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2551
2552         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2553         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2554         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2555         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2556
2557         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2558         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2559         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2560         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2561
2562         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2563                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2564                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2565         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2566                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2567                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2568         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2569                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2570                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2571         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2572                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2573                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2574
2575         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2576                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2577                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2578         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2579                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2580                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2581         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2582                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2583                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2584         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2585                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2586                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2587 }
2588
2589 static void gaudi_init_golden_registers(struct hl_device *hdev)
2590 {
2591         u32 tpc_offset;
2592         int tpc_id, i;
2593
2594         gaudi_init_e2e(hdev);
2595         gaudi_init_hbm_cred(hdev);
2596
2597         for (tpc_id = 0, tpc_offset = 0;
2598                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2599                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2600                 /* Mask all arithmetic interrupts from TPC */
2601                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2602                 /* Set 16 cache lines */
2603                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2604                                 ICACHE_FETCH_LINE_NUM, 2);
2605         }
2606
2607         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2608         for (i = 0 ; i < 128 ; i += 8)
2609                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2610
2611         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2612         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2613         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2614         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2615 }
2616
2617 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2618                                         int qman_id, dma_addr_t qman_pq_addr)
2619 {
2620         struct cpu_dyn_regs *dyn_regs =
2621                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2622         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2623         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2624         u32 q_off, dma_qm_offset;
2625         u32 dma_qm_err_cfg, irq_handler_offset;
2626
2627         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2628
2629         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2630                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2631         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2632                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2633         so_base_en_lo = lower_32_bits(CFG_BASE +
2634                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2635         so_base_en_hi = upper_32_bits(CFG_BASE +
2636                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2637         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2638                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2639         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2640                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2641         so_base_ws_lo = lower_32_bits(CFG_BASE +
2642                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2643         so_base_ws_hi = upper_32_bits(CFG_BASE +
2644                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2645
2646         q_off = dma_qm_offset + qman_id * 4;
2647
2648         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2649         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2650
2651         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2652         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2653         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2654
2655         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2656         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2657                                                         QMAN_LDMA_SRC_OFFSET);
2658         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2659                                                         QMAN_LDMA_DST_OFFSET);
2660
2661         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2662         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2663         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2664         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2665         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2666         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2667         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2668         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2669
2670         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2671
2672         /* The following configuration is needed only once per QMAN */
2673         if (qman_id == 0) {
2674                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2675                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2676                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2677
2678                 /* Configure RAZWI IRQ */
2679                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2680                 if (hdev->stop_on_err)
2681                         dma_qm_err_cfg |=
2682                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2683
2684                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2685
2686                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2687                         lower_32_bits(CFG_BASE + irq_handler_offset));
2688                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2689                         upper_32_bits(CFG_BASE + irq_handler_offset));
2690
2691                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2692                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2693                                                                         dma_id);
2694
2695                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2696                                 QM_ARB_ERR_MSG_EN_MASK);
2697
2698                 /* Set timeout to maximum */
2699                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2700
2701                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2702                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2703
2704                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2705         }
2706 }
2707
2708 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2709 {
2710         struct cpu_dyn_regs *dyn_regs =
2711                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2712         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2713         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2714         u32 irq_handler_offset;
2715
2716         /* Set to maximum possible according to physical size */
2717         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2718         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2719
2720         /* WA for H/W bug H3-2116 */
2721         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2722
2723         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2724         if (hdev->stop_on_err)
2725                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2726
2727         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2728
2729         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2730                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2731                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2732
2733         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2734                 lower_32_bits(CFG_BASE + irq_handler_offset));
2735         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2736                 upper_32_bits(CFG_BASE + irq_handler_offset));
2737
2738         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2739                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2740         WREG32(mmDMA0_CORE_PROT + dma_offset,
2741                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2742         /* If the channel is secured, it should be in MMU bypass mode */
2743         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2744                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2745         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2746 }
2747
2748 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2749                                 u32 enable_mask)
2750 {
2751         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2752
2753         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2754 }
2755
2756 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2757 {
2758         struct gaudi_device *gaudi = hdev->asic_specific;
2759         struct hl_hw_queue *q;
2760         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2761
2762         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2763                 return;
2764
2765         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2766                 dma_id = gaudi_dma_assignment[i];
2767                 /*
2768                  * For queues after the CPU Q need to add 1 to get the correct
2769                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2770                  * order to get the correct MSI register.
2771                  */
2772                 if (dma_id > 1) {
2773                         cpu_skip = 1;
2774                         nic_skip = NIC_NUMBER_OF_ENGINES;
2775                 } else {
2776                         cpu_skip = 0;
2777                         nic_skip = 0;
2778                 }
2779
2780                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2781                         q_idx = 4 * dma_id + j + cpu_skip;
2782                         q = &hdev->kernel_queues[q_idx];
2783                         q->cq_id = cq_id++;
2784                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2785                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2786                                                 q->bus_address);
2787                 }
2788
2789                 gaudi_init_dma_core(hdev, dma_id);
2790
2791                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2792         }
2793
2794         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2795 }
2796
2797 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2798                                         int qman_id, u64 qman_base_addr)
2799 {
2800         struct cpu_dyn_regs *dyn_regs =
2801                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2802         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2803         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2804         u32 dma_qm_err_cfg, irq_handler_offset;
2805         u32 q_off, dma_qm_offset;
2806
2807         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2808
2809         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2810                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2811         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2812                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2813         so_base_en_lo = lower_32_bits(CFG_BASE +
2814                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2815         so_base_en_hi = upper_32_bits(CFG_BASE +
2816                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2817         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2818                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2819         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2820                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2821         so_base_ws_lo = lower_32_bits(CFG_BASE +
2822                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2823         so_base_ws_hi = upper_32_bits(CFG_BASE +
2824                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2825
2826         q_off = dma_qm_offset + qman_id * 4;
2827
2828         if (qman_id < 4) {
2829                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2830                                         lower_32_bits(qman_base_addr));
2831                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2832                                         upper_32_bits(qman_base_addr));
2833
2834                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2835                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2836                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2837
2838                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2839                                                         QMAN_CPDMA_SIZE_OFFSET);
2840                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2841                                                         QMAN_CPDMA_SRC_OFFSET);
2842                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2843                                                         QMAN_CPDMA_DST_OFFSET);
2844         } else {
2845                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2846                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2847                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2848
2849                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2850                                                         QMAN_LDMA_SIZE_OFFSET);
2851                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2852                                                         QMAN_LDMA_SRC_OFFSET);
2853                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2854                                                         QMAN_LDMA_DST_OFFSET);
2855
2856                 /* Configure RAZWI IRQ */
2857                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2858                 if (hdev->stop_on_err)
2859                         dma_qm_err_cfg |=
2860                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2861
2862                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2863
2864                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2865                         lower_32_bits(CFG_BASE + irq_handler_offset));
2866                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2867                         upper_32_bits(CFG_BASE + irq_handler_offset));
2868
2869                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2870                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2871                                                                         dma_id);
2872
2873                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2874                                 QM_ARB_ERR_MSG_EN_MASK);
2875
2876                 /* Set timeout to maximum */
2877                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2878
2879                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2880                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2881                                 QMAN_INTERNAL_MAKE_TRUSTED);
2882         }
2883
2884         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2885         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2886         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2887         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2888
2889         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2890         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2891                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2892                                 mtr_base_ws_lo);
2893                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2894                                 mtr_base_ws_hi);
2895                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2896                                 so_base_ws_lo);
2897                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2898                                 so_base_ws_hi);
2899         }
2900 }
2901
2902 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2903 {
2904         struct gaudi_device *gaudi = hdev->asic_specific;
2905         struct gaudi_internal_qman_info *q;
2906         u64 qman_base_addr;
2907         int i, j, dma_id, internal_q_index;
2908
2909         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2910                 return;
2911
2912         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2913                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2914
2915                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2916                          /*
2917                           * Add the CPU queue in order to get the correct queue
2918                           * number as all internal queue are placed after it
2919                           */
2920                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2921
2922                         q = &gaudi->internal_qmans[internal_q_index];
2923                         qman_base_addr = (u64) q->pq_dma_addr;
2924                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2925                                                 qman_base_addr);
2926                 }
2927
2928                 /* Initializing lower CP for HBM DMA QMAN */
2929                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2930
2931                 gaudi_init_dma_core(hdev, dma_id);
2932
2933                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2934         }
2935
2936         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2937 }
2938
2939 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2940                                         int qman_id, u64 qman_base_addr)
2941 {
2942         struct cpu_dyn_regs *dyn_regs =
2943                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2944         u32 mtr_base_lo, mtr_base_hi;
2945         u32 so_base_lo, so_base_hi;
2946         u32 irq_handler_offset;
2947         u32 q_off, mme_id;
2948         u32 mme_qm_err_cfg;
2949
2950         mtr_base_lo = lower_32_bits(CFG_BASE +
2951                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2952         mtr_base_hi = upper_32_bits(CFG_BASE +
2953                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2954         so_base_lo = lower_32_bits(CFG_BASE +
2955                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2956         so_base_hi = upper_32_bits(CFG_BASE +
2957                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2958
2959         q_off = mme_offset + qman_id * 4;
2960
2961         if (qman_id < 4) {
2962                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2963                                         lower_32_bits(qman_base_addr));
2964                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2965                                         upper_32_bits(qman_base_addr));
2966
2967                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2968                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2969                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2970
2971                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2972                                                         QMAN_CPDMA_SIZE_OFFSET);
2973                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2974                                                         QMAN_CPDMA_SRC_OFFSET);
2975                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2976                                                         QMAN_CPDMA_DST_OFFSET);
2977         } else {
2978                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2979                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2980                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2981
2982                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2983                                                         QMAN_LDMA_SIZE_OFFSET);
2984                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2985                                                         QMAN_LDMA_SRC_OFFSET);
2986                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2987                                                         QMAN_LDMA_DST_OFFSET);
2988
2989                 /* Configure RAZWI IRQ */
2990                 mme_id = mme_offset /
2991                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2992
2993                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2994                 if (hdev->stop_on_err)
2995                         mme_qm_err_cfg |=
2996                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2997
2998                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2999
3000                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3001                         lower_32_bits(CFG_BASE + irq_handler_offset));
3002                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3003                         upper_32_bits(CFG_BASE + irq_handler_offset));
3004
3005                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3006                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3007                                                                         mme_id);
3008
3009                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3010                                 QM_ARB_ERR_MSG_EN_MASK);
3011
3012                 /* Set timeout to maximum */
3013                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3014
3015                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3016                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3017                                 QMAN_INTERNAL_MAKE_TRUSTED);
3018         }
3019
3020         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3021         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3022         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3023         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3024 }
3025
3026 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3027 {
3028         struct gaudi_device *gaudi = hdev->asic_specific;
3029         struct gaudi_internal_qman_info *q;
3030         u64 qman_base_addr;
3031         u32 mme_offset;
3032         int i, internal_q_index;
3033
3034         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3035                 return;
3036
3037         /*
3038          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3039          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3040          */
3041
3042         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3043
3044         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3045                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3046                 q = &gaudi->internal_qmans[internal_q_index];
3047                 qman_base_addr = (u64) q->pq_dma_addr;
3048                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3049                                         qman_base_addr);
3050                 if (i == 3)
3051                         mme_offset = 0;
3052         }
3053
3054         /* Initializing lower CP for MME QMANs */
3055         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3056         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3057         gaudi_init_mme_qman(hdev, 0, 4, 0);
3058
3059         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3060         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3061
3062         gaudi->hw_cap_initialized |= HW_CAP_MME;
3063 }
3064
3065 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3066                                 int qman_id, u64 qman_base_addr)
3067 {
3068         struct cpu_dyn_regs *dyn_regs =
3069                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3070         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3071         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3072         u32 tpc_qm_err_cfg, irq_handler_offset;
3073         u32 q_off, tpc_id;
3074
3075         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3076                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3077         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3078                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3079         so_base_en_lo = lower_32_bits(CFG_BASE +
3080                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3081         so_base_en_hi = upper_32_bits(CFG_BASE +
3082                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3083         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3084                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3085         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3086                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3087         so_base_ws_lo = lower_32_bits(CFG_BASE +
3088                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3089         so_base_ws_hi = upper_32_bits(CFG_BASE +
3090                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3091
3092         q_off = tpc_offset + qman_id * 4;
3093
3094         tpc_id = tpc_offset /
3095                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3096
3097         if (qman_id < 4) {
3098                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3099                                         lower_32_bits(qman_base_addr));
3100                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3101                                         upper_32_bits(qman_base_addr));
3102
3103                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3104                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3105                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3106
3107                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3108                                                         QMAN_CPDMA_SIZE_OFFSET);
3109                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3110                                                         QMAN_CPDMA_SRC_OFFSET);
3111                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3112                                                         QMAN_CPDMA_DST_OFFSET);
3113         } else {
3114                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3115                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3116                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3117
3118                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3119                                                         QMAN_LDMA_SIZE_OFFSET);
3120                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3121                                                         QMAN_LDMA_SRC_OFFSET);
3122                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3123                                                         QMAN_LDMA_DST_OFFSET);
3124
3125                 /* Configure RAZWI IRQ */
3126                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3127                 if (hdev->stop_on_err)
3128                         tpc_qm_err_cfg |=
3129                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3130
3131                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3132
3133                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3134                         lower_32_bits(CFG_BASE + irq_handler_offset));
3135                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3136                         upper_32_bits(CFG_BASE + irq_handler_offset));
3137
3138                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3139                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3140                                                                         tpc_id);
3141
3142                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3143                                 QM_ARB_ERR_MSG_EN_MASK);
3144
3145                 /* Set timeout to maximum */
3146                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3147
3148                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3149                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3150                                 QMAN_INTERNAL_MAKE_TRUSTED);
3151         }
3152
3153         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3154         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3155         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3156         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3157
3158         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3159         if (tpc_id == 6) {
3160                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3161                                 mtr_base_ws_lo);
3162                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3163                                 mtr_base_ws_hi);
3164                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3165                                 so_base_ws_lo);
3166                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3167                                 so_base_ws_hi);
3168         }
3169 }
3170
3171 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3172 {
3173         struct gaudi_device *gaudi = hdev->asic_specific;
3174         struct gaudi_internal_qman_info *q;
3175         u64 qman_base_addr;
3176         u32 so_base_hi, tpc_offset = 0;
3177         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3178                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3179         int i, tpc_id, internal_q_index;
3180
3181         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3182                 return;
3183
3184         so_base_hi = upper_32_bits(CFG_BASE +
3185                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3186
3187         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3188                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3189                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3190                                                 tpc_id * QMAN_STREAMS + i;
3191                         q = &gaudi->internal_qmans[internal_q_index];
3192                         qman_base_addr = (u64) q->pq_dma_addr;
3193                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3194                                                 qman_base_addr);
3195
3196                         if (i == 3) {
3197                                 /* Initializing lower CP for TPC QMAN */
3198                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3199
3200                                 /* Enable the QMAN and TPC channel */
3201                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3202                                                 QMAN_TPC_ENABLE);
3203                         }
3204                 }
3205
3206                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3207                                 so_base_hi);
3208
3209                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3210
3211                 gaudi->hw_cap_initialized |=
3212                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3213         }
3214 }
3215
3216 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3217                                 int qman_id, u64 qman_base_addr, int nic_id)
3218 {
3219         struct cpu_dyn_regs *dyn_regs =
3220                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3221         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3222         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3223         u32 nic_qm_err_cfg, irq_handler_offset;
3224         u32 q_off;
3225
3226         mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3227                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3228         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3229                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3230         so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3231                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3232         so_base_en_hi = upper_32_bits(CFG_BASE +
3233                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3234         mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3235                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3236         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3237                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3238         so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3239                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3240         so_base_ws_hi = upper_32_bits(CFG_BASE +
3241                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3242
3243         q_off = nic_offset + qman_id * 4;
3244
3245         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3246         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3247
3248         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3249         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3250         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3251
3252         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3253                                                         QMAN_LDMA_SIZE_OFFSET);
3254         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3255                                                         QMAN_LDMA_SRC_OFFSET);
3256         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3257                                                         QMAN_LDMA_DST_OFFSET);
3258
3259         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3260         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3261         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3262         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3263
3264         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3265         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3266         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3267         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3268         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3269
3270         if (qman_id == 0) {
3271                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3272                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3273                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3274
3275                 /* Configure RAZWI IRQ */
3276                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3277                 if (hdev->stop_on_err)
3278                         nic_qm_err_cfg |=
3279                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3280
3281                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3282
3283                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3284                         lower_32_bits(CFG_BASE + irq_handler_offset));
3285                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3286                         upper_32_bits(CFG_BASE + irq_handler_offset));
3287
3288                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3289                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3290                                                                         nic_id);
3291
3292                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3293                                 QM_ARB_ERR_MSG_EN_MASK);
3294
3295                 /* Set timeout to maximum */
3296                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3297
3298                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3299                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3300                                 QMAN_INTERNAL_MAKE_TRUSTED);
3301         }
3302 }
3303
3304 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3305 {
3306         struct gaudi_device *gaudi = hdev->asic_specific;
3307         struct gaudi_internal_qman_info *q;
3308         u64 qman_base_addr;
3309         u32 nic_offset = 0;
3310         u32 nic_delta_between_qmans =
3311                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3312         u32 nic_delta_between_nics =
3313                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3314         int i, nic_id, internal_q_index;
3315
3316         if (!hdev->nic_ports_mask)
3317                 return;
3318
3319         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3320                 return;
3321
3322         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3323
3324         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3325                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3326                         nic_offset += nic_delta_between_qmans;
3327                         if (nic_id & 1) {
3328                                 nic_offset -= (nic_delta_between_qmans * 2);
3329                                 nic_offset += nic_delta_between_nics;
3330                         }
3331                         continue;
3332                 }
3333
3334                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3335                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3336                                                 nic_id * QMAN_STREAMS + i;
3337                         q = &gaudi->internal_qmans[internal_q_index];
3338                         qman_base_addr = (u64) q->pq_dma_addr;
3339                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3340                                                 qman_base_addr, nic_id);
3341                 }
3342
3343                 /* Enable the QMAN */
3344                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3345
3346                 nic_offset += nic_delta_between_qmans;
3347                 if (nic_id & 1) {
3348                         nic_offset -= (nic_delta_between_qmans * 2);
3349                         nic_offset += nic_delta_between_nics;
3350                 }
3351
3352                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3353         }
3354 }
3355
3356 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3357 {
3358         struct gaudi_device *gaudi = hdev->asic_specific;
3359
3360         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3361                 return;
3362
3363         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3364         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3365         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3366 }
3367
3368 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3369 {
3370         struct gaudi_device *gaudi = hdev->asic_specific;
3371
3372         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3373                 return;
3374
3375         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3376         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3377         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3378         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3379         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3380 }
3381
3382 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3383 {
3384         struct gaudi_device *gaudi = hdev->asic_specific;
3385
3386         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3387                 return;
3388
3389         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3390         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3391 }
3392
3393 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3394 {
3395         struct gaudi_device *gaudi = hdev->asic_specific;
3396         u32 tpc_offset = 0;
3397         int tpc_id;
3398
3399         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3400                 return;
3401
3402         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3403                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3404                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3405         }
3406 }
3407
3408 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3409 {
3410         struct gaudi_device *gaudi = hdev->asic_specific;
3411         u32 nic_mask, nic_offset = 0;
3412         u32 nic_delta_between_qmans =
3413                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3414         u32 nic_delta_between_nics =
3415                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3416         int nic_id;
3417
3418         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3419                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3420
3421                 if (gaudi->hw_cap_initialized & nic_mask)
3422                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3423
3424                 nic_offset += nic_delta_between_qmans;
3425                 if (nic_id & 1) {
3426                         nic_offset -= (nic_delta_between_qmans * 2);
3427                         nic_offset += nic_delta_between_nics;
3428                 }
3429         }
3430 }
3431
3432 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3433 {
3434         struct gaudi_device *gaudi = hdev->asic_specific;
3435
3436         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3437                 return;
3438
3439         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3440         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3441         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3442         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443 }
3444
3445 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3446 {
3447         struct gaudi_device *gaudi = hdev->asic_specific;
3448
3449         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3450                 return;
3451
3452         /* Stop CPs of HBM DMA QMANs */
3453
3454         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3455         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3456         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3457         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3458         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3459 }
3460
3461 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3462 {
3463         struct gaudi_device *gaudi = hdev->asic_specific;
3464
3465         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3466                 return;
3467
3468         /* Stop CPs of MME QMANs */
3469         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471 }
3472
3473 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3474 {
3475         struct gaudi_device *gaudi = hdev->asic_specific;
3476
3477         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3478                 return;
3479
3480         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3481         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3482         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3483         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3484         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3485         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3486         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3487         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3488 }
3489
3490 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3491 {
3492         struct gaudi_device *gaudi = hdev->asic_specific;
3493
3494         /* Stop upper CPs of QMANs */
3495
3496         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3497                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3498                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3499                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3500                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3501
3502         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3503                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3504                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3505                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3506                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3507
3508         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3509                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3510                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3511                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3512                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3513
3514         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3515                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3516                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3517                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3518                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3519
3520         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3521                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3522                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3525
3526         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3527                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3528                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3531
3532         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3533                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3534                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3537
3538         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3539                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3540                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3543
3544         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3545                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3546                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3547                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3548                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3549
3550         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3551                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3552                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3553                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3554                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3555 }
3556
3557 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3558 {
3559         struct gaudi_device *gaudi = hdev->asic_specific;
3560
3561         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3562                 return;
3563
3564         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3567 }
3568
3569 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3570 {
3571         struct gaudi_device *gaudi = hdev->asic_specific;
3572
3573         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3574                 return;
3575
3576         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3577         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3578         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3579         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3580         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3581 }
3582
3583 static void gaudi_mme_stall(struct hl_device *hdev)
3584 {
3585         struct gaudi_device *gaudi = hdev->asic_specific;
3586
3587         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3588                 return;
3589
3590         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3591         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3592         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3593         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3594         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3595         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3596         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3597         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3598         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3599         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3600         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3601         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3602         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3603         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3604         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3605         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3606         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3607 }
3608
3609 static void gaudi_tpc_stall(struct hl_device *hdev)
3610 {
3611         struct gaudi_device *gaudi = hdev->asic_specific;
3612
3613         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3614                 return;
3615
3616         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3617         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3618         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3619         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3620         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3621         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3622         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3623         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3624 }
3625
3626 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3627 {
3628         u32 qman_offset;
3629         int i;
3630
3631         if (hdev->asic_prop.fw_security_enabled)
3632                 return;
3633
3634         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3635                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3636                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3637
3638                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3639         }
3640
3641         WREG32(mmMME0_QM_CGM_CFG, 0);
3642         WREG32(mmMME0_QM_CGM_CFG1, 0);
3643         WREG32(mmMME2_QM_CGM_CFG, 0);
3644         WREG32(mmMME2_QM_CGM_CFG1, 0);
3645
3646         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3647                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3648                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3649
3650                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3651         }
3652 }
3653
3654 static void gaudi_enable_timestamp(struct hl_device *hdev)
3655 {
3656         /* Disable the timestamp counter */
3657         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3658
3659         /* Zero the lower/upper parts of the 64-bit counter */
3660         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3661         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3662
3663         /* Enable the counter */
3664         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3665 }
3666
3667 static void gaudi_disable_timestamp(struct hl_device *hdev)
3668 {
3669         /* Disable the timestamp counter */
3670         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3671 }
3672
3673 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3674 {
3675         u32 wait_timeout_ms;
3676
3677         if (hdev->pldm)
3678                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3679         else
3680                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3681
3682         if (fw_reset)
3683                 goto skip_engines;
3684
3685         gaudi_stop_nic_qmans(hdev);
3686         gaudi_stop_mme_qmans(hdev);
3687         gaudi_stop_tpc_qmans(hdev);
3688         gaudi_stop_hbm_dma_qmans(hdev);
3689         gaudi_stop_pci_dma_qmans(hdev);
3690
3691         msleep(wait_timeout_ms);
3692
3693         gaudi_pci_dma_stall(hdev);
3694         gaudi_hbm_dma_stall(hdev);
3695         gaudi_tpc_stall(hdev);
3696         gaudi_mme_stall(hdev);
3697
3698         msleep(wait_timeout_ms);
3699
3700         gaudi_disable_nic_qmans(hdev);
3701         gaudi_disable_mme_qmans(hdev);
3702         gaudi_disable_tpc_qmans(hdev);
3703         gaudi_disable_hbm_dma_qmans(hdev);
3704         gaudi_disable_pci_dma_qmans(hdev);
3705
3706         gaudi_disable_timestamp(hdev);
3707
3708 skip_engines:
3709         gaudi_disable_msi(hdev);
3710 }
3711
3712 static int gaudi_mmu_init(struct hl_device *hdev)
3713 {
3714         struct asic_fixed_properties *prop = &hdev->asic_prop;
3715         struct gaudi_device *gaudi = hdev->asic_specific;
3716         u64 hop0_addr;
3717         int rc, i;
3718
3719         if (!hdev->mmu_enable)
3720                 return 0;
3721
3722         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3723                 return 0;
3724
3725         for (i = 0 ; i < prop->max_asid ; i++) {
3726                 hop0_addr = prop->mmu_pgt_addr +
3727                                 (i * prop->mmu_hop_table_size);
3728
3729                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3730                 if (rc) {
3731                         dev_err(hdev->dev,
3732                                 "failed to set hop0 addr for asid %d\n", i);
3733                         goto err;
3734                 }
3735         }
3736
3737         /* init MMU cache manage page */
3738         WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3739         WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3740
3741         /* mem cache invalidation */
3742         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3743
3744         hl_mmu_invalidate_cache(hdev, true, 0);
3745
3746         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3747         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3748
3749         WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3750
3751         /*
3752          * The H/W expects the first PI after init to be 1. After wraparound
3753          * we'll write 0.
3754          */
3755         gaudi->mmu_cache_inv_pi = 1;
3756
3757         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3758
3759         return 0;
3760
3761 err:
3762         return rc;
3763 }
3764
3765 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3766 {
3767         void __iomem *dst;
3768
3769         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3770
3771         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3772 }
3773
3774 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3775 {
3776         void __iomem *dst;
3777
3778         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3779
3780         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3781 }
3782
3783 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3784 {
3785         struct dynamic_fw_load_mgr *dynamic_loader;
3786         struct cpu_dyn_regs *dyn_regs;
3787
3788         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3789
3790         /*
3791          * here we update initial values for few specific dynamic regs (as
3792          * before reading the first descriptor from FW those value has to be
3793          * hard-coded) in later stages of the protocol those values will be
3794          * updated automatically by reading the FW descriptor so data there
3795          * will always be up-to-date
3796          */
3797         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3798         dyn_regs->kmd_msg_to_cpu =
3799                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3800         dyn_regs->cpu_cmd_status_to_host =
3801                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3802
3803         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3804 }
3805
3806 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3807 {
3808         struct static_fw_load_mgr *static_loader;
3809
3810         static_loader = &hdev->fw_loader.static_loader;
3811
3812         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3813         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3814         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3815         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3816         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3817         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3818         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3819         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3820         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3821         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3822         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3823         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3824         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3825                         GAUDI_PLDM_RESET_WAIT_MSEC :
3826                         GAUDI_CPU_RESET_WAIT_MSEC;
3827 }
3828
3829 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3830 {
3831         struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3832
3833         pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3834         pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3835         pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3836         pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3837         pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3838         pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3839 }
3840
3841 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3842 {
3843         struct asic_fixed_properties *prop = &hdev->asic_prop;
3844         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3845
3846         /* fill common fields */
3847         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3848         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3849         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3850         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3851         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3852         fw_loader->skip_bmc = !hdev->bmc_enable;
3853         fw_loader->sram_bar_id = SRAM_BAR_ID;
3854         fw_loader->dram_bar_id = HBM_BAR_ID;
3855
3856         if (prop->dynamic_fw_load)
3857                 gaudi_init_dynamic_firmware_loader(hdev);
3858         else
3859                 gaudi_init_static_firmware_loader(hdev);
3860 }
3861
3862 static int gaudi_init_cpu(struct hl_device *hdev)
3863 {
3864         struct gaudi_device *gaudi = hdev->asic_specific;
3865         int rc;
3866
3867         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3868                 return 0;
3869
3870         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3871                 return 0;
3872
3873         /*
3874          * The device CPU works with 40 bits addresses.
3875          * This register sets the extension to 50 bits.
3876          */
3877         if (!hdev->asic_prop.fw_security_enabled)
3878                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3879
3880         rc = hl_fw_init_cpu(hdev);
3881
3882         if (rc)
3883                 return rc;
3884
3885         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3886
3887         return 0;
3888 }
3889
3890 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3891 {
3892         struct cpu_dyn_regs *dyn_regs =
3893                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3894         struct asic_fixed_properties *prop = &hdev->asic_prop;
3895         struct gaudi_device *gaudi = hdev->asic_specific;
3896         u32 status, irq_handler_offset;
3897         struct hl_eq *eq;
3898         struct hl_hw_queue *cpu_pq =
3899                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3900         int err;
3901
3902         if (!hdev->cpu_queues_enable)
3903                 return 0;
3904
3905         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3906                 return 0;
3907
3908         eq = &hdev->event_queue;
3909
3910         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3911         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3912
3913         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3914         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3915
3916         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3917                         lower_32_bits(hdev->cpu_accessible_dma_address));
3918         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3919                         upper_32_bits(hdev->cpu_accessible_dma_address));
3920
3921         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3922         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3923         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3924
3925         /* Used for EQ CI */
3926         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3927
3928         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3929
3930         if (gaudi->multi_msi_mode)
3931                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3932         else
3933                 WREG32(mmCPU_IF_QUEUE_INIT,
3934                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3935
3936         irq_handler_offset = prop->gic_interrupts_enable ?
3937                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3938                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3939
3940         WREG32(irq_handler_offset,
3941                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3942
3943         err = hl_poll_timeout(
3944                 hdev,
3945                 mmCPU_IF_QUEUE_INIT,
3946                 status,
3947                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3948                 1000,
3949                 cpu_timeout);
3950
3951         if (err) {
3952                 dev_err(hdev->dev,
3953                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3954                 return -EIO;
3955         }
3956
3957         /* update FW application security bits */
3958         if (prop->fw_cpu_boot_dev_sts0_valid)
3959                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3960         if (prop->fw_cpu_boot_dev_sts1_valid)
3961                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3962
3963         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3964         return 0;
3965 }
3966
3967 static void gaudi_pre_hw_init(struct hl_device *hdev)
3968 {
3969         /* Perform read from the device to make sure device is up */
3970         RREG32(mmHW_STATE);
3971
3972         if (!hdev->asic_prop.fw_security_enabled) {
3973                 /* Set the access through PCI bars (Linux driver only) as
3974                  * secured
3975                  */
3976                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3977                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3978                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3979
3980                 /* Perform read to flush the waiting writes to ensure
3981                  * configuration was set in the device
3982                  */
3983                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3984         }
3985
3986         /*
3987          * Let's mark in the H/W that we have reached this point. We check
3988          * this value in the reset_before_init function to understand whether
3989          * we need to reset the chip before doing H/W init. This register is
3990          * cleared by the H/W upon H/W reset
3991          */
3992         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3993 }
3994
3995 static int gaudi_hw_init(struct hl_device *hdev)
3996 {
3997         struct gaudi_device *gaudi = hdev->asic_specific;
3998         int rc;
3999
4000         gaudi_pre_hw_init(hdev);
4001
4002         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4003          * So we set it here and if anyone tries to move it later to
4004          * a different address, there will be an error
4005          */
4006         if (hdev->asic_prop.iatu_done_by_fw)
4007                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4008
4009         /*
4010          * Before pushing u-boot/linux to device, need to set the hbm bar to
4011          * base address of dram
4012          */
4013         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4014                 dev_err(hdev->dev,
4015                         "failed to map HBM bar to DRAM base address\n");
4016                 return -EIO;
4017         }
4018
4019         rc = gaudi_init_cpu(hdev);
4020         if (rc) {
4021                 dev_err(hdev->dev, "failed to initialize CPU\n");
4022                 return rc;
4023         }
4024
4025         /* In case the clock gating was enabled in preboot we need to disable
4026          * it here before touching the MME/TPC registers.
4027          */
4028         gaudi_disable_clock_gating(hdev);
4029
4030         /* SRAM scrambler must be initialized after CPU is running from HBM */
4031         gaudi_init_scrambler_sram(hdev);
4032
4033         /* This is here just in case we are working without CPU */
4034         gaudi_init_scrambler_hbm(hdev);
4035
4036         gaudi_init_golden_registers(hdev);
4037
4038         rc = gaudi_mmu_init(hdev);
4039         if (rc)
4040                 return rc;
4041
4042         gaudi_init_security(hdev);
4043
4044         gaudi_init_pci_dma_qmans(hdev);
4045
4046         gaudi_init_hbm_dma_qmans(hdev);
4047
4048         gaudi_init_mme_qmans(hdev);
4049
4050         gaudi_init_tpc_qmans(hdev);
4051
4052         gaudi_init_nic_qmans(hdev);
4053
4054         gaudi_enable_timestamp(hdev);
4055
4056         /* MSI must be enabled before CPU queues and NIC are initialized */
4057         rc = gaudi_enable_msi(hdev);
4058         if (rc)
4059                 goto disable_queues;
4060
4061         /* must be called after MSI was enabled */
4062         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4063         if (rc) {
4064                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4065                         rc);
4066                 goto disable_msi;
4067         }
4068
4069         /* Perform read from the device to flush all configuration */
4070         RREG32(mmHW_STATE);
4071
4072         return 0;
4073
4074 disable_msi:
4075         gaudi_disable_msi(hdev);
4076 disable_queues:
4077         gaudi_disable_mme_qmans(hdev);
4078         gaudi_disable_pci_dma_qmans(hdev);
4079
4080         return rc;
4081 }
4082
4083 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4084 {
4085         struct cpu_dyn_regs *dyn_regs =
4086                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4087         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4088         struct gaudi_device *gaudi = hdev->asic_specific;
4089         bool driver_performs_reset;
4090
4091         if (!hard_reset) {
4092                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4093                 return;
4094         }
4095
4096         if (hdev->pldm) {
4097                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4098                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4099         } else {
4100                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4101                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4102         }
4103
4104         if (fw_reset) {
4105                 dev_dbg(hdev->dev,
4106                         "Firmware performs HARD reset, going to wait %dms\n",
4107                         reset_timeout_ms);
4108
4109                 goto skip_reset;
4110         }
4111
4112         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4113                                         !hdev->asic_prop.hard_reset_done_by_fw);
4114
4115         /* Set device to handle FLR by H/W as we will put the device CPU to
4116          * halt mode
4117          */
4118         if (driver_performs_reset)
4119                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4120                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4121
4122         /* If linux is loaded in the device CPU we need to communicate with it
4123          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4124          * registers in case of old F/Ws
4125          */
4126         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4127                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4128                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4129                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4130
4131                 WREG32(irq_handler_offset,
4132                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4133
4134                 /* This is a hail-mary attempt to revive the card in the small chance that the
4135                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4136                  * In that case, triggering reset through GIC won't help. We need to trigger the
4137                  * reset as if Linux wasn't loaded.
4138                  *
4139                  * We do it only if the reset cause was HB, because that would be the indication
4140                  * of such an event.
4141                  *
4142                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4143                  * damage.
4144                  */
4145                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4146                         if (hdev->asic_prop.hard_reset_done_by_fw)
4147                                 hl_fw_ask_hard_reset_without_linux(hdev);
4148                         else
4149                                 hl_fw_ask_halt_machine_without_linux(hdev);
4150                 }
4151         } else {
4152                 if (hdev->asic_prop.hard_reset_done_by_fw)
4153                         hl_fw_ask_hard_reset_without_linux(hdev);
4154                 else
4155                         hl_fw_ask_halt_machine_without_linux(hdev);
4156         }
4157
4158         if (driver_performs_reset) {
4159
4160                 /* Configure the reset registers. Must be done as early as
4161                  * possible in case we fail during H/W initialization
4162                  */
4163                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4164                                                 (CFG_RST_H_DMA_MASK |
4165                                                 CFG_RST_H_MME_MASK |
4166                                                 CFG_RST_H_SM_MASK |
4167                                                 CFG_RST_H_TPC_7_MASK));
4168
4169                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4170
4171                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4172                                                 (CFG_RST_H_HBM_MASK |
4173                                                 CFG_RST_H_TPC_7_MASK |
4174                                                 CFG_RST_H_NIC_MASK |
4175                                                 CFG_RST_H_SM_MASK |
4176                                                 CFG_RST_H_DMA_MASK |
4177                                                 CFG_RST_H_MME_MASK |
4178                                                 CFG_RST_H_CPU_MASK |
4179                                                 CFG_RST_H_MMU_MASK));
4180
4181                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4182                                                 (CFG_RST_L_IF_MASK |
4183                                                 CFG_RST_L_PSOC_MASK |
4184                                                 CFG_RST_L_TPC_MASK));
4185
4186                 msleep(cpu_timeout_ms);
4187
4188                 /* Tell ASIC not to re-initialize PCIe */
4189                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4190
4191                 /* Restart BTL/BLR upon hard-reset */
4192                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4193
4194                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4195                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4196
4197                 dev_dbg(hdev->dev,
4198                         "Issued HARD reset command, going to wait %dms\n",
4199                         reset_timeout_ms);
4200         } else {
4201                 dev_dbg(hdev->dev,
4202                         "Firmware performs HARD reset, going to wait %dms\n",
4203                         reset_timeout_ms);
4204         }
4205
4206 skip_reset:
4207         /*
4208          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4209          * itself is in reset. Need to wait until the reset is deasserted
4210          */
4211         msleep(reset_timeout_ms);
4212
4213         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4214         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4215                 dev_err(hdev->dev,
4216                         "Timeout while waiting for device to reset 0x%x\n",
4217                         status);
4218
4219         if (gaudi) {
4220                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4221                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4222                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4223                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4224                                                 HW_CAP_HBM_SCRAMBLER);
4225
4226                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4227
4228                 hdev->device_cpu_is_halted = false;
4229         }
4230 }
4231
4232 static int gaudi_suspend(struct hl_device *hdev)
4233 {
4234         int rc;
4235
4236         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4237         if (rc)
4238                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4239
4240         return rc;
4241 }
4242
4243 static int gaudi_resume(struct hl_device *hdev)
4244 {
4245         return gaudi_init_iatu(hdev);
4246 }
4247
4248 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4249                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4250 {
4251         int rc;
4252
4253         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4254                         VM_DONTCOPY | VM_NORESERVE;
4255
4256         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4257                                 (dma_addr - HOST_PHYS_BASE), size);
4258         if (rc)
4259                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4260
4261         return rc;
4262 }
4263
4264 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4265 {
4266         struct cpu_dyn_regs *dyn_regs =
4267                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4268         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4269         struct gaudi_device *gaudi = hdev->asic_specific;
4270         bool invalid_queue = false;
4271         int dma_id;
4272
4273         switch (hw_queue_id) {
4274         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4275                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4276                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4277                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4278                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4279                 break;
4280
4281         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4282                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4283                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4284                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4285                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4286                 break;
4287
4288         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4289                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4290                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4291                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4292                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4293                 break;
4294
4295         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4296                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4297                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4298                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4299                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4300                 break;
4301
4302         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4303                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4304                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4305                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4306                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4307                 break;
4308
4309         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4310                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4311                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4312                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4313                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4314                 break;
4315
4316         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4317                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4318                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4319                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4320                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4321                 break;
4322
4323         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4324                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4325                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4326                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4327                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4328                 break;
4329
4330         case GAUDI_QUEUE_ID_CPU_PQ:
4331                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4332                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4333                 else
4334                         invalid_queue = true;
4335                 break;
4336
4337         case GAUDI_QUEUE_ID_MME_0_0:
4338                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4339                 break;
4340
4341         case GAUDI_QUEUE_ID_MME_0_1:
4342                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4343                 break;
4344
4345         case GAUDI_QUEUE_ID_MME_0_2:
4346                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4347                 break;
4348
4349         case GAUDI_QUEUE_ID_MME_0_3:
4350                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4351                 break;
4352
4353         case GAUDI_QUEUE_ID_MME_1_0:
4354                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4355                 break;
4356
4357         case GAUDI_QUEUE_ID_MME_1_1:
4358                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4359                 break;
4360
4361         case GAUDI_QUEUE_ID_MME_1_2:
4362                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4363                 break;
4364
4365         case GAUDI_QUEUE_ID_MME_1_3:
4366                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4367                 break;
4368
4369         case GAUDI_QUEUE_ID_TPC_0_0:
4370                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4371                 break;
4372
4373         case GAUDI_QUEUE_ID_TPC_0_1:
4374                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4375                 break;
4376
4377         case GAUDI_QUEUE_ID_TPC_0_2:
4378                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4379                 break;
4380
4381         case GAUDI_QUEUE_ID_TPC_0_3:
4382                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4383                 break;
4384
4385         case GAUDI_QUEUE_ID_TPC_1_0:
4386                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4387                 break;
4388
4389         case GAUDI_QUEUE_ID_TPC_1_1:
4390                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4391                 break;
4392
4393         case GAUDI_QUEUE_ID_TPC_1_2:
4394                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4395                 break;
4396
4397         case GAUDI_QUEUE_ID_TPC_1_3:
4398                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4399                 break;
4400
4401         case GAUDI_QUEUE_ID_TPC_2_0:
4402                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4403                 break;
4404
4405         case GAUDI_QUEUE_ID_TPC_2_1:
4406                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4407                 break;
4408
4409         case GAUDI_QUEUE_ID_TPC_2_2:
4410                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4411                 break;
4412
4413         case GAUDI_QUEUE_ID_TPC_2_3:
4414                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4415                 break;
4416
4417         case GAUDI_QUEUE_ID_TPC_3_0:
4418                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4419                 break;
4420
4421         case GAUDI_QUEUE_ID_TPC_3_1:
4422                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4423                 break;
4424
4425         case GAUDI_QUEUE_ID_TPC_3_2:
4426                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4427                 break;
4428
4429         case GAUDI_QUEUE_ID_TPC_3_3:
4430                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4431                 break;
4432
4433         case GAUDI_QUEUE_ID_TPC_4_0:
4434                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4435                 break;
4436
4437         case GAUDI_QUEUE_ID_TPC_4_1:
4438                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4439                 break;
4440
4441         case GAUDI_QUEUE_ID_TPC_4_2:
4442                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4443                 break;
4444
4445         case GAUDI_QUEUE_ID_TPC_4_3:
4446                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4447                 break;
4448
4449         case GAUDI_QUEUE_ID_TPC_5_0:
4450                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4451                 break;
4452
4453         case GAUDI_QUEUE_ID_TPC_5_1:
4454                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4455                 break;
4456
4457         case GAUDI_QUEUE_ID_TPC_5_2:
4458                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4459                 break;
4460
4461         case GAUDI_QUEUE_ID_TPC_5_3:
4462                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4463                 break;
4464
4465         case GAUDI_QUEUE_ID_TPC_6_0:
4466                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4467                 break;
4468
4469         case GAUDI_QUEUE_ID_TPC_6_1:
4470                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4471                 break;
4472
4473         case GAUDI_QUEUE_ID_TPC_6_2:
4474                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4475                 break;
4476
4477         case GAUDI_QUEUE_ID_TPC_6_3:
4478                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4479                 break;
4480
4481         case GAUDI_QUEUE_ID_TPC_7_0:
4482                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4483                 break;
4484
4485         case GAUDI_QUEUE_ID_TPC_7_1:
4486                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4487                 break;
4488
4489         case GAUDI_QUEUE_ID_TPC_7_2:
4490                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4491                 break;
4492
4493         case GAUDI_QUEUE_ID_TPC_7_3:
4494                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4495                 break;
4496
4497         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4498                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4499                         invalid_queue = true;
4500
4501                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4502                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4503                 break;
4504
4505         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4506                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4507                         invalid_queue = true;
4508
4509                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4510                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4511                 break;
4512
4513         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4514                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4515                         invalid_queue = true;
4516
4517                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4518                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4519                 break;
4520
4521         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4522                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4523                         invalid_queue = true;
4524
4525                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4526                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4527                 break;
4528
4529         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4530                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4531                         invalid_queue = true;
4532
4533                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4534                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4535                 break;
4536
4537         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4538                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4539                         invalid_queue = true;
4540
4541                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4542                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4543                 break;
4544
4545         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4546                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4547                         invalid_queue = true;
4548
4549                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4550                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4551                 break;
4552
4553         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4554                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4555                         invalid_queue = true;
4556
4557                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4558                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4559                 break;
4560
4561         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4562                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4563                         invalid_queue = true;
4564
4565                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4566                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4567                 break;
4568
4569         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4570                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4571                         invalid_queue = true;
4572
4573                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4574                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4575                 break;
4576
4577         default:
4578                 invalid_queue = true;
4579         }
4580
4581         if (invalid_queue) {
4582                 /* Should never get here */
4583                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4584                         hw_queue_id);
4585                 return;
4586         }
4587
4588         db_value = pi;
4589
4590         /* ring the doorbell */
4591         WREG32(db_reg_offset, db_value);
4592
4593         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4594                 /* make sure device CPU will read latest data from host */
4595                 mb();
4596
4597                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4598                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4599                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4600
4601                 WREG32(irq_handler_offset,
4602                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4603         }
4604 }
4605
4606 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4607                                 struct hl_bd *bd)
4608 {
4609         __le64 *pbd = (__le64 *) bd;
4610
4611         /* The QMANs are on the host memory so a simple copy suffice */
4612         pqe[0] = pbd[0];
4613         pqe[1] = pbd[1];
4614 }
4615
4616 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4617                                         dma_addr_t *dma_handle, gfp_t flags)
4618 {
4619         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4620                                                 dma_handle, flags);
4621
4622         /* Shift to the device's base physical address of host memory */
4623         if (kernel_addr)
4624                 *dma_handle += HOST_PHYS_BASE;
4625
4626         return kernel_addr;
4627 }
4628
4629 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4630                 void *cpu_addr, dma_addr_t dma_handle)
4631 {
4632         /* Cancel the device's base physical address of host memory */
4633         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4634
4635         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4636 }
4637
4638 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4639 {
4640         struct asic_fixed_properties *prop = &hdev->asic_prop;
4641         u64 cur_addr = prop->dram_user_base_address;
4642         u32 chunk_size, busy;
4643         int rc, dma_id;
4644
4645         while (cur_addr < prop->dram_end_address) {
4646                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4647                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4648
4649                         chunk_size =
4650                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4651
4652                         dev_dbg(hdev->dev,
4653                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4654                                 cur_addr, cur_addr + chunk_size);
4655
4656                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4657                                         lower_32_bits(val));
4658                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4659                                         upper_32_bits(val));
4660                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4661                                                 lower_32_bits(cur_addr));
4662                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4663                                                 upper_32_bits(cur_addr));
4664                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4665                                         chunk_size);
4666                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4667                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4668                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4669
4670                         cur_addr += chunk_size;
4671
4672                         if (cur_addr == prop->dram_end_address)
4673                                 break;
4674                 }
4675
4676                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4677                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4678
4679                         rc = hl_poll_timeout(
4680                                 hdev,
4681                                 mmDMA0_CORE_STS0 + dma_offset,
4682                                 busy,
4683                                 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4684                                 1000,
4685                                 HBM_SCRUBBING_TIMEOUT_US);
4686
4687                         if (rc) {
4688                                 dev_err(hdev->dev,
4689                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4690                                         dma_id);
4691                                 return -EIO;
4692                         }
4693                 }
4694         }
4695
4696         return 0;
4697 }
4698
4699 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4700 {
4701         struct asic_fixed_properties *prop = &hdev->asic_prop;
4702         u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4703                         min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4704         u64 addr, size, val = hdev->memory_scrub_val;
4705         ktime_t timeout;
4706         int rc = 0;
4707
4708         if (!hdev->memory_scrub)
4709                 return 0;
4710
4711         timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4712         while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4713                 if (ktime_compare(ktime_get(), timeout) > 0) {
4714                         dev_err(hdev->dev, "waiting for idle timeout\n");
4715                         return -ETIMEDOUT;
4716                 }
4717                 usleep_range((1000 >> 2) + 1, 1000);
4718         }
4719
4720         /* Scrub SRAM */
4721         addr = prop->sram_user_base_address;
4722         size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4723
4724         dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4725                         addr, addr + size, val);
4726         rc = gaudi_memset_device_memory(hdev, addr, size, val);
4727         if (rc) {
4728                 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4729                 return rc;
4730         }
4731
4732         /* Scrub HBM using all DMA channels in parallel */
4733         rc = gaudi_scrub_device_dram(hdev, val);
4734         if (rc) {
4735                 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4736                 return rc;
4737         }
4738
4739         return 0;
4740 }
4741
4742 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4743                                 u32 queue_id, dma_addr_t *dma_handle,
4744                                 u16 *queue_len)
4745 {
4746         struct gaudi_device *gaudi = hdev->asic_specific;
4747         struct gaudi_internal_qman_info *q;
4748
4749         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4750                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4751                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4752                 return NULL;
4753         }
4754
4755         q = &gaudi->internal_qmans[queue_id];
4756         *dma_handle = q->pq_dma_addr;
4757         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4758
4759         return q->pq_kernel_addr;
4760 }
4761
4762 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4763                                 u16 len, u32 timeout, u64 *result)
4764 {
4765         struct gaudi_device *gaudi = hdev->asic_specific;
4766
4767         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4768                 if (result)
4769                         *result = 0;
4770                 return 0;
4771         }
4772
4773         if (!timeout)
4774                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4775
4776         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4777                                                 timeout, result);
4778 }
4779
4780 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4781 {
4782         struct packet_msg_prot *fence_pkt;
4783         dma_addr_t pkt_dma_addr;
4784         u32 fence_val, tmp, timeout_usec;
4785         dma_addr_t fence_dma_addr;
4786         u32 *fence_ptr;
4787         int rc;
4788
4789         if (hdev->pldm)
4790                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4791         else
4792                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4793
4794         fence_val = GAUDI_QMAN0_FENCE_VAL;
4795
4796         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4797         if (!fence_ptr) {
4798                 dev_err(hdev->dev,
4799                         "Failed to allocate memory for H/W queue %d testing\n",
4800                         hw_queue_id);
4801                 return -ENOMEM;
4802         }
4803
4804         *fence_ptr = 0;
4805
4806         fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4807                                                 &pkt_dma_addr);
4808         if (!fence_pkt) {
4809                 dev_err(hdev->dev,
4810                         "Failed to allocate packet for H/W queue %d testing\n",
4811                         hw_queue_id);
4812                 rc = -ENOMEM;
4813                 goto free_fence_ptr;
4814         }
4815
4816         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4817         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4818         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4819
4820         fence_pkt->ctl = cpu_to_le32(tmp);
4821         fence_pkt->value = cpu_to_le32(fence_val);
4822         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4823
4824         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4825                                         sizeof(struct packet_msg_prot),
4826                                         pkt_dma_addr);
4827         if (rc) {
4828                 dev_err(hdev->dev,
4829                         "Failed to send fence packet to H/W queue %d\n",
4830                         hw_queue_id);
4831                 goto free_pkt;
4832         }
4833
4834         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4835                                         1000, timeout_usec, true);
4836
4837         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4838
4839         if (rc == -ETIMEDOUT) {
4840                 dev_err(hdev->dev,
4841                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4842                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4843                 rc = -EIO;
4844         }
4845
4846 free_pkt:
4847         hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4848 free_fence_ptr:
4849         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4850         return rc;
4851 }
4852
4853 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4854 {
4855         struct gaudi_device *gaudi = hdev->asic_specific;
4856
4857         /*
4858          * check capability here as send_cpu_message() won't update the result
4859          * value if no capability
4860          */
4861         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4862                 return 0;
4863
4864         return hl_fw_test_cpu_queue(hdev);
4865 }
4866
4867 static int gaudi_test_queues(struct hl_device *hdev)
4868 {
4869         int i, rc, ret_val = 0;
4870
4871         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4872                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4873                         rc = gaudi_test_queue(hdev, i);
4874                         if (rc)
4875                                 ret_val = -EINVAL;
4876                 }
4877         }
4878
4879         rc = gaudi_test_cpu_queue(hdev);
4880         if (rc)
4881                 ret_val = -EINVAL;
4882
4883         return ret_val;
4884 }
4885
4886 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4887                 gfp_t mem_flags, dma_addr_t *dma_handle)
4888 {
4889         void *kernel_addr;
4890
4891         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4892                 return NULL;
4893
4894         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4895
4896         /* Shift to the device's base physical address of host memory */
4897         if (kernel_addr)
4898                 *dma_handle += HOST_PHYS_BASE;
4899
4900         return kernel_addr;
4901 }
4902
4903 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4904                         dma_addr_t dma_addr)
4905 {
4906         /* Cancel the device's base physical address of host memory */
4907         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4908
4909         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4910 }
4911
4912 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4913                                         size_t size, dma_addr_t *dma_handle)
4914 {
4915         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4916 }
4917
4918 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4919                                                 size_t size, void *vaddr)
4920 {
4921         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4922 }
4923
4924 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4925 {
4926         struct scatterlist *sg, *sg_next_iter;
4927         u32 count, dma_desc_cnt;
4928         u64 len, len_next;
4929         dma_addr_t addr, addr_next;
4930
4931         dma_desc_cnt = 0;
4932
4933         for_each_sgtable_dma_sg(sgt, sg, count) {
4934                 len = sg_dma_len(sg);
4935                 addr = sg_dma_address(sg);
4936
4937                 if (len == 0)
4938                         break;
4939
4940                 while ((count + 1) < sgt->nents) {
4941                         sg_next_iter = sg_next(sg);
4942                         len_next = sg_dma_len(sg_next_iter);
4943                         addr_next = sg_dma_address(sg_next_iter);
4944
4945                         if (len_next == 0)
4946                                 break;
4947
4948                         if ((addr + len == addr_next) &&
4949                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4950                                 len += len_next;
4951                                 count++;
4952                                 sg = sg_next_iter;
4953                         } else {
4954                                 break;
4955                         }
4956                 }
4957
4958                 dma_desc_cnt++;
4959         }
4960
4961         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4962 }
4963
4964 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4965                                 struct hl_cs_parser *parser,
4966                                 struct packet_lin_dma *user_dma_pkt,
4967                                 u64 addr, enum dma_data_direction dir)
4968 {
4969         struct hl_userptr *userptr;
4970         int rc;
4971
4972         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4973                         parser->job_userptr_list, &userptr))
4974                 goto already_pinned;
4975
4976         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4977         if (!userptr)
4978                 return -ENOMEM;
4979
4980         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4981                                 userptr);
4982         if (rc)
4983                 goto free_userptr;
4984
4985         list_add_tail(&userptr->job_node, parser->job_userptr_list);
4986
4987         rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4988         if (rc) {
4989                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4990                 goto unpin_memory;
4991         }
4992
4993         userptr->dma_mapped = true;
4994         userptr->dir = dir;
4995
4996 already_pinned:
4997         parser->patched_cb_size +=
4998                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4999
5000         return 0;
5001
5002 unpin_memory:
5003         list_del(&userptr->job_node);
5004         hl_unpin_host_memory(hdev, userptr);
5005 free_userptr:
5006         kfree(userptr);
5007         return rc;
5008 }
5009
5010 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5011                                 struct hl_cs_parser *parser,
5012                                 struct packet_lin_dma *user_dma_pkt,
5013                                 bool src_in_host)
5014 {
5015         enum dma_data_direction dir;
5016         bool skip_host_mem_pin = false, user_memset;
5017         u64 addr;
5018         int rc = 0;
5019
5020         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5021                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5022                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5023
5024         if (src_in_host) {
5025                 if (user_memset)
5026                         skip_host_mem_pin = true;
5027
5028                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5029                 dir = DMA_TO_DEVICE;
5030                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5031         } else {
5032                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5033                 dir = DMA_FROM_DEVICE;
5034                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5035                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5036                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5037         }
5038
5039         if (skip_host_mem_pin)
5040                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5041         else
5042                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5043                                                 addr, dir);
5044
5045         return rc;
5046 }
5047
5048 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5049                                 struct hl_cs_parser *parser,
5050                                 struct packet_lin_dma *user_dma_pkt)
5051 {
5052         bool src_in_host = false;
5053         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5054                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5055                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5056
5057         dev_dbg(hdev->dev, "DMA packet details:\n");
5058         dev_dbg(hdev->dev, "source == 0x%llx\n",
5059                                 le64_to_cpu(user_dma_pkt->src_addr));
5060         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5061         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5062
5063         /*
5064          * Special handling for DMA with size 0. Bypass all validations
5065          * because no transactions will be done except for WR_COMP, which
5066          * is not a security issue
5067          */
5068         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5069                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5070                 return 0;
5071         }
5072
5073         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5074                 src_in_host = true;
5075
5076         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5077                                                 src_in_host);
5078 }
5079
5080 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5081                                         struct hl_cs_parser *parser,
5082                                         struct packet_load_and_exe *user_pkt)
5083 {
5084         u32 cfg;
5085
5086         cfg = le32_to_cpu(user_pkt->cfg);
5087
5088         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5089                 dev_err(hdev->dev,
5090                         "User not allowed to use Load and Execute\n");
5091                 return -EPERM;
5092         }
5093
5094         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5095
5096         return 0;
5097 }
5098
5099 static int gaudi_validate_cb(struct hl_device *hdev,
5100                         struct hl_cs_parser *parser, bool is_mmu)
5101 {
5102         u32 cb_parsed_length = 0;
5103         int rc = 0;
5104
5105         parser->patched_cb_size = 0;
5106
5107         /* cb_user_size is more than 0 so loop will always be executed */
5108         while (cb_parsed_length < parser->user_cb_size) {
5109                 enum packet_id pkt_id;
5110                 u16 pkt_size;
5111                 struct gaudi_packet *user_pkt;
5112
5113                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5114
5115                 pkt_id = (enum packet_id) (
5116                                 (le64_to_cpu(user_pkt->header) &
5117                                 PACKET_HEADER_PACKET_ID_MASK) >>
5118                                         PACKET_HEADER_PACKET_ID_SHIFT);
5119
5120                 if (!validate_packet_id(pkt_id)) {
5121                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5122                         rc = -EINVAL;
5123                         break;
5124                 }
5125
5126                 pkt_size = gaudi_packet_sizes[pkt_id];
5127                 cb_parsed_length += pkt_size;
5128                 if (cb_parsed_length > parser->user_cb_size) {
5129                         dev_err(hdev->dev,
5130                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5131                         rc = -EINVAL;
5132                         break;
5133                 }
5134
5135                 switch (pkt_id) {
5136                 case PACKET_MSG_PROT:
5137                         dev_err(hdev->dev,
5138                                 "User not allowed to use MSG_PROT\n");
5139                         rc = -EPERM;
5140                         break;
5141
5142                 case PACKET_CP_DMA:
5143                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5144                         rc = -EPERM;
5145                         break;
5146
5147                 case PACKET_STOP:
5148                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5149                         rc = -EPERM;
5150                         break;
5151
5152                 case PACKET_WREG_BULK:
5153                         dev_err(hdev->dev,
5154                                 "User not allowed to use WREG_BULK\n");
5155                         rc = -EPERM;
5156                         break;
5157
5158                 case PACKET_LOAD_AND_EXE:
5159                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5160                                 (struct packet_load_and_exe *) user_pkt);
5161                         break;
5162
5163                 case PACKET_LIN_DMA:
5164                         parser->contains_dma_pkt = true;
5165                         if (is_mmu)
5166                                 parser->patched_cb_size += pkt_size;
5167                         else
5168                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5169                                         (struct packet_lin_dma *) user_pkt);
5170                         break;
5171
5172                 case PACKET_WREG_32:
5173                 case PACKET_MSG_LONG:
5174                 case PACKET_MSG_SHORT:
5175                 case PACKET_REPEAT:
5176                 case PACKET_FENCE:
5177                 case PACKET_NOP:
5178                 case PACKET_ARB_POINT:
5179                         parser->patched_cb_size += pkt_size;
5180                         break;
5181
5182                 default:
5183                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5184                                 pkt_id);
5185                         rc = -EINVAL;
5186                         break;
5187                 }
5188
5189                 if (rc)
5190                         break;
5191         }
5192
5193         /*
5194          * The new CB should have space at the end for two MSG_PROT packets:
5195          * 1. Optional NOP padding for cacheline alignment
5196          * 2. A packet that will act as a completion packet
5197          * 3. A packet that will generate MSI interrupt
5198          */
5199         if (parser->completion)
5200                 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5201                         parser->patched_cb_size);
5202
5203         return rc;
5204 }
5205
5206 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5207                                 struct hl_cs_parser *parser,
5208                                 struct packet_lin_dma *user_dma_pkt,
5209                                 struct packet_lin_dma *new_dma_pkt,
5210                                 u32 *new_dma_pkt_size)
5211 {
5212         struct hl_userptr *userptr;
5213         struct scatterlist *sg, *sg_next_iter;
5214         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5215         u64 len, len_next;
5216         dma_addr_t dma_addr, dma_addr_next;
5217         u64 device_memory_addr, addr;
5218         enum dma_data_direction dir;
5219         struct sg_table *sgt;
5220         bool src_in_host = false;
5221         bool skip_host_mem_pin = false;
5222         bool user_memset;
5223
5224         ctl = le32_to_cpu(user_dma_pkt->ctl);
5225
5226         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5227                 src_in_host = true;
5228
5229         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5230                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5231
5232         if (src_in_host) {
5233                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5234                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5235                 dir = DMA_TO_DEVICE;
5236                 if (user_memset)
5237                         skip_host_mem_pin = true;
5238         } else {
5239                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5240                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5241                 dir = DMA_FROM_DEVICE;
5242         }
5243
5244         if ((!skip_host_mem_pin) &&
5245                 (!hl_userptr_is_pinned(hdev, addr,
5246                                         le32_to_cpu(user_dma_pkt->tsize),
5247                                         parser->job_userptr_list, &userptr))) {
5248                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5249                                 addr, user_dma_pkt->tsize);
5250                 return -EFAULT;
5251         }
5252
5253         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5254                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5255                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5256                 return 0;
5257         }
5258
5259         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5260
5261         sgt = userptr->sgt;
5262         dma_desc_cnt = 0;
5263
5264         for_each_sgtable_dma_sg(sgt, sg, count) {
5265                 len = sg_dma_len(sg);
5266                 dma_addr = sg_dma_address(sg);
5267
5268                 if (len == 0)
5269                         break;
5270
5271                 while ((count + 1) < sgt->nents) {
5272                         sg_next_iter = sg_next(sg);
5273                         len_next = sg_dma_len(sg_next_iter);
5274                         dma_addr_next = sg_dma_address(sg_next_iter);
5275
5276                         if (len_next == 0)
5277                                 break;
5278
5279                         if ((dma_addr + len == dma_addr_next) &&
5280                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5281                                 len += len_next;
5282                                 count++;
5283                                 sg = sg_next_iter;
5284                         } else {
5285                                 break;
5286                         }
5287                 }
5288
5289                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5290                 if (likely(dma_desc_cnt))
5291                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5292                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5293                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5294                 new_dma_pkt->tsize = cpu_to_le32(len);
5295
5296                 if (dir == DMA_TO_DEVICE) {
5297                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5298                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5299                 } else {
5300                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5301                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5302                 }
5303
5304                 if (!user_memset)
5305                         device_memory_addr += len;
5306                 dma_desc_cnt++;
5307                 new_dma_pkt++;
5308         }
5309
5310         if (!dma_desc_cnt) {
5311                 dev_err(hdev->dev,
5312                         "Error of 0 SG entries when patching DMA packet\n");
5313                 return -EFAULT;
5314         }
5315
5316         /* Fix the last dma packet - wrcomp must be as user set it */
5317         new_dma_pkt--;
5318         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5319
5320         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5321
5322         return 0;
5323 }
5324
5325 static int gaudi_patch_cb(struct hl_device *hdev,
5326                                 struct hl_cs_parser *parser)
5327 {
5328         u32 cb_parsed_length = 0;
5329         u32 cb_patched_cur_length = 0;
5330         int rc = 0;
5331
5332         /* cb_user_size is more than 0 so loop will always be executed */
5333         while (cb_parsed_length < parser->user_cb_size) {
5334                 enum packet_id pkt_id;
5335                 u16 pkt_size;
5336                 u32 new_pkt_size = 0;
5337                 struct gaudi_packet *user_pkt, *kernel_pkt;
5338
5339                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5340                 kernel_pkt = parser->patched_cb->kernel_address +
5341                                         cb_patched_cur_length;
5342
5343                 pkt_id = (enum packet_id) (
5344                                 (le64_to_cpu(user_pkt->header) &
5345                                 PACKET_HEADER_PACKET_ID_MASK) >>
5346                                         PACKET_HEADER_PACKET_ID_SHIFT);
5347
5348                 if (!validate_packet_id(pkt_id)) {
5349                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5350                         rc = -EINVAL;
5351                         break;
5352                 }
5353
5354                 pkt_size = gaudi_packet_sizes[pkt_id];
5355                 cb_parsed_length += pkt_size;
5356                 if (cb_parsed_length > parser->user_cb_size) {
5357                         dev_err(hdev->dev,
5358                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5359                         rc = -EINVAL;
5360                         break;
5361                 }
5362
5363                 switch (pkt_id) {
5364                 case PACKET_LIN_DMA:
5365                         rc = gaudi_patch_dma_packet(hdev, parser,
5366                                         (struct packet_lin_dma *) user_pkt,
5367                                         (struct packet_lin_dma *) kernel_pkt,
5368                                         &new_pkt_size);
5369                         cb_patched_cur_length += new_pkt_size;
5370                         break;
5371
5372                 case PACKET_MSG_PROT:
5373                         dev_err(hdev->dev,
5374                                 "User not allowed to use MSG_PROT\n");
5375                         rc = -EPERM;
5376                         break;
5377
5378                 case PACKET_CP_DMA:
5379                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5380                         rc = -EPERM;
5381                         break;
5382
5383                 case PACKET_STOP:
5384                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5385                         rc = -EPERM;
5386                         break;
5387
5388                 case PACKET_WREG_32:
5389                 case PACKET_WREG_BULK:
5390                 case PACKET_MSG_LONG:
5391                 case PACKET_MSG_SHORT:
5392                 case PACKET_REPEAT:
5393                 case PACKET_FENCE:
5394                 case PACKET_NOP:
5395                 case PACKET_ARB_POINT:
5396                 case PACKET_LOAD_AND_EXE:
5397                         memcpy(kernel_pkt, user_pkt, pkt_size);
5398                         cb_patched_cur_length += pkt_size;
5399                         break;
5400
5401                 default:
5402                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5403                                 pkt_id);
5404                         rc = -EINVAL;
5405                         break;
5406                 }
5407
5408                 if (rc)
5409                         break;
5410         }
5411
5412         return rc;
5413 }
5414
5415 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5416                 struct hl_cs_parser *parser)
5417 {
5418         u64 handle;
5419         u32 patched_cb_size;
5420         struct hl_cb *user_cb;
5421         int rc;
5422
5423         /*
5424          * The new CB should have space at the end for two MSG_PROT packets:
5425          * 1. Optional NOP padding for cacheline alignment
5426          * 2. A packet that will act as a completion packet
5427          * 3. A packet that will generate MSI interrupt
5428          */
5429         if (parser->completion)
5430                 parser->patched_cb_size = parser->user_cb_size +
5431                                 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5432         else
5433                 parser->patched_cb_size = parser->user_cb_size;
5434
5435         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5436                                 parser->patched_cb_size, false, false,
5437                                 &handle);
5438
5439         if (rc) {
5440                 dev_err(hdev->dev,
5441                         "Failed to allocate patched CB for DMA CS %d\n",
5442                         rc);
5443                 return rc;
5444         }
5445
5446         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5447         /* hl_cb_get should never fail */
5448         if (!parser->patched_cb) {
5449                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5450                 rc = -EFAULT;
5451                 goto out;
5452         }
5453
5454         /*
5455          * We are protected from overflow because the check
5456          * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5457          * in the common code. That check is done only if is_kernel_allocated_cb is true.
5458          *
5459          * There is no option to reach here without going through that check because:
5460          * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5461          *    an external queue.
5462          * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5463          */
5464         memcpy(parser->patched_cb->kernel_address,
5465                 parser->user_cb->kernel_address,
5466                 parser->user_cb_size);
5467
5468         patched_cb_size = parser->patched_cb_size;
5469
5470         /* Validate patched CB instead of user CB */
5471         user_cb = parser->user_cb;
5472         parser->user_cb = parser->patched_cb;
5473         rc = gaudi_validate_cb(hdev, parser, true);
5474         parser->user_cb = user_cb;
5475
5476         if (rc) {
5477                 hl_cb_put(parser->patched_cb);
5478                 goto out;
5479         }
5480
5481         if (patched_cb_size != parser->patched_cb_size) {
5482                 dev_err(hdev->dev, "user CB size mismatch\n");
5483                 hl_cb_put(parser->patched_cb);
5484                 rc = -EINVAL;
5485                 goto out;
5486         }
5487
5488 out:
5489         /*
5490          * Always call cb destroy here because we still have 1 reference
5491          * to it by calling cb_get earlier. After the job will be completed,
5492          * cb_put will release it, but here we want to remove it from the
5493          * idr
5494          */
5495         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5496
5497         return rc;
5498 }
5499
5500 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5501                 struct hl_cs_parser *parser)
5502 {
5503         u64 handle;
5504         int rc;
5505
5506         rc = gaudi_validate_cb(hdev, parser, false);
5507
5508         if (rc)
5509                 goto free_userptr;
5510
5511         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5512                                 parser->patched_cb_size, false, false,
5513                                 &handle);
5514         if (rc) {
5515                 dev_err(hdev->dev,
5516                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5517                 goto free_userptr;
5518         }
5519
5520         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5521         /* hl_cb_get should never fail here */
5522         if (!parser->patched_cb) {
5523                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5524                 rc = -EFAULT;
5525                 goto out;
5526         }
5527
5528         rc = gaudi_patch_cb(hdev, parser);
5529
5530         if (rc)
5531                 hl_cb_put(parser->patched_cb);
5532
5533 out:
5534         /*
5535          * Always call cb destroy here because we still have 1 reference
5536          * to it by calling cb_get earlier. After the job will be completed,
5537          * cb_put will release it, but here we want to remove it from the
5538          * idr
5539          */
5540         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5541
5542 free_userptr:
5543         if (rc)
5544                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5545         return rc;
5546 }
5547
5548 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5549                                         struct hl_cs_parser *parser)
5550 {
5551         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5552         struct gaudi_device *gaudi = hdev->asic_specific;
5553         u32 nic_queue_offset, nic_mask_q_id;
5554
5555         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5556                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5557                 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5558                 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5559
5560                 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5561                         dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5562                         return -EINVAL;
5563                 }
5564         }
5565
5566         /* For internal queue jobs just check if CB address is valid */
5567         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5568                                         parser->user_cb_size,
5569                                         asic_prop->sram_user_base_address,
5570                                         asic_prop->sram_end_address))
5571                 return 0;
5572
5573         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5574                                         parser->user_cb_size,
5575                                         asic_prop->dram_user_base_address,
5576                                         asic_prop->dram_end_address))
5577                 return 0;
5578
5579         /* PMMU and HPMMU addresses are equal, check only one of them */
5580         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5581                                         parser->user_cb_size,
5582                                         asic_prop->pmmu.start_addr,
5583                                         asic_prop->pmmu.end_addr))
5584                 return 0;
5585
5586         dev_err(hdev->dev,
5587                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5588                 parser->user_cb, parser->user_cb_size);
5589
5590         return -EFAULT;
5591 }
5592
5593 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5594 {
5595         struct gaudi_device *gaudi = hdev->asic_specific;
5596
5597         if (parser->queue_type == QUEUE_TYPE_INT)
5598                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5599
5600         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5601                 return gaudi_parse_cb_mmu(hdev, parser);
5602         else
5603                 return gaudi_parse_cb_no_mmu(hdev, parser);
5604 }
5605
5606 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5607                                 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5608                                 u32 msi_vec, bool eb)
5609 {
5610         struct gaudi_device *gaudi = hdev->asic_specific;
5611         struct packet_msg_prot *cq_pkt;
5612         struct packet_nop *cq_padding;
5613         u64 msi_addr;
5614         u32 tmp;
5615
5616         cq_padding = kernel_address + original_len;
5617         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5618
5619         while ((void *)cq_padding < (void *)cq_pkt) {
5620                 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5621                 cq_padding++;
5622         }
5623
5624         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5625         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5626
5627         if (eb)
5628                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5629
5630         cq_pkt->ctl = cpu_to_le32(tmp);
5631         cq_pkt->value = cpu_to_le32(cq_val);
5632         cq_pkt->addr = cpu_to_le64(cq_addr);
5633
5634         cq_pkt++;
5635
5636         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5637         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5638         cq_pkt->ctl = cpu_to_le32(tmp);
5639         cq_pkt->value = cpu_to_le32(1);
5640
5641         if (gaudi->multi_msi_mode)
5642                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5643         else
5644                 msi_addr = mmPCIE_CORE_MSI_REQ;
5645
5646         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5647 }
5648
5649 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5650 {
5651         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5652 }
5653
5654 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5655                                         u32 size, u64 val)
5656 {
5657         struct packet_lin_dma *lin_dma_pkt;
5658         struct hl_cs_job *job;
5659         u32 cb_size, ctl, err_cause;
5660         struct hl_cb *cb;
5661         int rc;
5662
5663         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5664         if (!cb)
5665                 return -EFAULT;
5666
5667         lin_dma_pkt = cb->kernel_address;
5668         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5669         cb_size = sizeof(*lin_dma_pkt);
5670
5671         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5672         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5673         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5674         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5675         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5676
5677         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5678         lin_dma_pkt->src_addr = cpu_to_le64(val);
5679         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5680         lin_dma_pkt->tsize = cpu_to_le32(size);
5681
5682         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5683         if (!job) {
5684                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5685                 rc = -ENOMEM;
5686                 goto release_cb;
5687         }
5688
5689         /* Verify DMA is OK */
5690         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5691         if (err_cause && !hdev->init_done) {
5692                 dev_dbg(hdev->dev,
5693                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5694                         err_cause);
5695                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5696         }
5697
5698         job->id = 0;
5699         job->user_cb = cb;
5700         atomic_inc(&job->user_cb->cs_cnt);
5701         job->user_cb_size = cb_size;
5702         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5703         job->patched_cb = job->user_cb;
5704         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5705
5706         hl_debugfs_add_job(hdev, job);
5707
5708         rc = gaudi_send_job_on_qman0(hdev, job);
5709         hl_debugfs_remove_job(hdev, job);
5710         kfree(job);
5711         atomic_dec(&cb->cs_cnt);
5712
5713         /* Verify DMA is OK */
5714         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5715         if (err_cause) {
5716                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5717                 rc = -EIO;
5718                 if (!hdev->init_done) {
5719                         dev_dbg(hdev->dev,
5720                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5721                                 err_cause);
5722                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5723                 }
5724         }
5725
5726 release_cb:
5727         hl_cb_put(cb);
5728         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5729
5730         return rc;
5731 }
5732
5733 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5734                                         u32 num_regs, u32 val)
5735 {
5736         struct packet_msg_long *pkt;
5737         struct hl_cs_job *job;
5738         u32 cb_size, ctl;
5739         struct hl_cb *cb;
5740         int i, rc;
5741
5742         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5743
5744         if (cb_size > SZ_2M) {
5745                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5746                 return -ENOMEM;
5747         }
5748
5749         cb = hl_cb_kernel_create(hdev, cb_size, false);
5750         if (!cb)
5751                 return -EFAULT;
5752
5753         pkt = cb->kernel_address;
5754
5755         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5756         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5757         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5758         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5759         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5760
5761         for (i = 0; i < num_regs ; i++, pkt++) {
5762                 pkt->ctl = cpu_to_le32(ctl);
5763                 pkt->value = cpu_to_le32(val);
5764                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5765         }
5766
5767         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5768         if (!job) {
5769                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5770                 rc = -ENOMEM;
5771                 goto release_cb;
5772         }
5773
5774         job->id = 0;
5775         job->user_cb = cb;
5776         atomic_inc(&job->user_cb->cs_cnt);
5777         job->user_cb_size = cb_size;
5778         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5779         job->patched_cb = job->user_cb;
5780         job->job_cb_size = cb_size;
5781
5782         hl_debugfs_add_job(hdev, job);
5783
5784         rc = gaudi_send_job_on_qman0(hdev, job);
5785         hl_debugfs_remove_job(hdev, job);
5786         kfree(job);
5787         atomic_dec(&cb->cs_cnt);
5788
5789 release_cb:
5790         hl_cb_put(cb);
5791         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5792
5793         return rc;
5794 }
5795
5796 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5797 {
5798         u64 base_addr;
5799         u32 num_regs;
5800         int rc;
5801
5802         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5803         num_regs = NUM_OF_SOB_IN_BLOCK;
5804         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5805         if (rc) {
5806                 dev_err(hdev->dev, "failed resetting SM registers");
5807                 return -ENOMEM;
5808         }
5809
5810         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5811         num_regs = NUM_OF_SOB_IN_BLOCK;
5812         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5813         if (rc) {
5814                 dev_err(hdev->dev, "failed resetting SM registers");
5815                 return -ENOMEM;
5816         }
5817
5818         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5819         num_regs = NUM_OF_SOB_IN_BLOCK;
5820         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5821         if (rc) {
5822                 dev_err(hdev->dev, "failed resetting SM registers");
5823                 return -ENOMEM;
5824         }
5825
5826         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5827         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5828         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5829         if (rc) {
5830                 dev_err(hdev->dev, "failed resetting SM registers");
5831                 return -ENOMEM;
5832         }
5833
5834         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5835         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5836         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5837         if (rc) {
5838                 dev_err(hdev->dev, "failed resetting SM registers");
5839                 return -ENOMEM;
5840         }
5841
5842         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5843         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5844         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5845         if (rc) {
5846                 dev_err(hdev->dev, "failed resetting SM registers");
5847                 return -ENOMEM;
5848         }
5849
5850         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5851                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5852         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5853         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5854         if (rc) {
5855                 dev_err(hdev->dev, "failed resetting SM registers");
5856                 return -ENOMEM;
5857         }
5858
5859         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5860                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5861         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5862         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5863         if (rc) {
5864                 dev_err(hdev->dev, "failed resetting SM registers");
5865                 return -ENOMEM;
5866         }
5867
5868         return 0;
5869 }
5870
5871 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5872 {
5873         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5874                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5875         int i;
5876
5877         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5878                 u64 sob_addr = CFG_BASE +
5879                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5880                                 (i * sob_delta);
5881                 u32 dma_offset = i * DMA_CORE_OFFSET;
5882
5883                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5884                                 lower_32_bits(sob_addr));
5885                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5886                                 upper_32_bits(sob_addr));
5887                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5888
5889                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5890                  * modified by the user for SRAM reduction
5891                  */
5892                 if (i > 1)
5893                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5894                                                                 0x00000001);
5895         }
5896 }
5897
5898 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5899 {
5900         u32 qman_offset;
5901         int i;
5902
5903         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5904                 qman_offset = i * DMA_QMAN_OFFSET;
5905                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5906         }
5907
5908         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5909                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5910                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5911         }
5912
5913         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5914                 qman_offset = i * TPC_QMAN_OFFSET;
5915                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5916         }
5917
5918         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5919                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5920                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5921                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5922         }
5923 }
5924
5925 static int gaudi_restore_user_registers(struct hl_device *hdev)
5926 {
5927         int rc;
5928
5929         rc = gaudi_restore_sm_registers(hdev);
5930         if (rc)
5931                 return rc;
5932
5933         gaudi_restore_dma_registers(hdev);
5934         gaudi_restore_qm_registers(hdev);
5935
5936         return 0;
5937 }
5938
5939 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5940 {
5941         return 0;
5942 }
5943
5944 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5945 {
5946         u32 size = hdev->asic_prop.mmu_pgt_size +
5947                         hdev->asic_prop.mmu_cache_mng_size;
5948         struct gaudi_device *gaudi = hdev->asic_specific;
5949         u64 addr = hdev->asic_prop.mmu_pgt_addr;
5950
5951         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5952                 return 0;
5953
5954         return gaudi_memset_device_memory(hdev, addr, size, 0);
5955 }
5956
5957 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5958 {
5959
5960 }
5961
5962 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5963                                         u32 size_to_dma, dma_addr_t dma_addr)
5964 {
5965         u32 err_cause, val;
5966         u64 dma_offset;
5967         int rc;
5968
5969         dma_offset = dma_id * DMA_CORE_OFFSET;
5970
5971         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5972         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5973         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5974         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5975         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5976         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5977                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5978
5979         rc = hl_poll_timeout(
5980                 hdev,
5981                 mmDMA0_CORE_STS0 + dma_offset,
5982                 val,
5983                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5984                 0,
5985                 1000000);
5986
5987         if (rc) {
5988                 dev_err(hdev->dev,
5989                         "DMA %d timed-out during reading of 0x%llx\n",
5990                         dma_id, addr);
5991                 return -EIO;
5992         }
5993
5994         /* Verify DMA is OK */
5995         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5996         if (err_cause) {
5997                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5998                 dev_dbg(hdev->dev,
5999                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6000                         err_cause);
6001                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6002
6003                 return -EIO;
6004         }
6005
6006         return 0;
6007 }
6008
6009 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6010                                 void *blob_addr)
6011 {
6012         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6013         u32 qm_glbl_sts0, qm_cgm_sts;
6014         u64 dma_offset, qm_offset;
6015         dma_addr_t dma_addr;
6016         void *kernel_addr;
6017         bool is_eng_idle;
6018         int rc = 0, dma_id;
6019
6020         kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6021
6022         if (!kernel_addr)
6023                 return -ENOMEM;
6024
6025         hdev->asic_funcs->hw_queues_lock(hdev);
6026
6027         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6028         dma_offset = dma_id * DMA_CORE_OFFSET;
6029         qm_offset = dma_id * DMA_QMAN_OFFSET;
6030         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6031         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6032         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6033         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6034                       IS_DMA_IDLE(dma_core_sts0);
6035
6036         if (!is_eng_idle) {
6037                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6038                 dma_offset = dma_id * DMA_CORE_OFFSET;
6039                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6040                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6041                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6042                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6043                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6044                               IS_DMA_IDLE(dma_core_sts0);
6045
6046                 if (!is_eng_idle) {
6047                         dev_err_ratelimited(hdev->dev,
6048                                 "Can't read via DMA because it is BUSY\n");
6049                         rc = -EAGAIN;
6050                         goto out;
6051                 }
6052         }
6053
6054         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6055         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6056                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6057
6058         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6059          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6060          * ASID
6061          */
6062         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6063
6064         /* Verify DMA is OK */
6065         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6066         if (err_cause) {
6067                 dev_dbg(hdev->dev,
6068                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6069                         err_cause);
6070                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6071         }
6072
6073         pos = 0;
6074         size_left = size;
6075         size_to_dma = SZ_2M;
6076
6077         while (size_left > 0) {
6078
6079                 if (size_left < SZ_2M)
6080                         size_to_dma = size_left;
6081
6082                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6083                                                 dma_addr);
6084                 if (rc)
6085                         break;
6086
6087                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6088
6089                 if (size_left <= SZ_2M)
6090                         break;
6091
6092                 pos += SZ_2M;
6093                 addr += SZ_2M;
6094                 size_left -= SZ_2M;
6095         }
6096
6097         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6098          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6099          * ASID
6100          */
6101         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6102                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6103
6104         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6105
6106 out:
6107         hdev->asic_funcs->hw_queues_unlock(hdev);
6108
6109         hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6110
6111         return rc;
6112 }
6113
6114 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6115 {
6116         struct gaudi_device *gaudi = hdev->asic_specific;
6117
6118         if (hdev->reset_info.hard_reset_pending)
6119                 return U64_MAX;
6120
6121         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6122                         (addr - gaudi->hbm_bar_cur_addr));
6123 }
6124
6125 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6126 {
6127         struct gaudi_device *gaudi = hdev->asic_specific;
6128
6129         if (hdev->reset_info.hard_reset_pending)
6130                 return;
6131
6132         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6133                         (addr - gaudi->hbm_bar_cur_addr));
6134 }
6135
6136 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6137 {
6138         /* mask to zero the MMBP and ASID bits */
6139         WREG32_AND(reg, ~0x7FF);
6140         WREG32_OR(reg, asid);
6141 }
6142
6143 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6144 {
6145         struct gaudi_device *gaudi = hdev->asic_specific;
6146
6147         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6148                 return;
6149
6150         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6151                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6152                 return;
6153         }
6154
6155         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6156         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6157         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6158         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6159         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6160
6161         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6162         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6163         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6164         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6165         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6166
6167         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6168         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6169         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6170         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6171         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6172
6173         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6174         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6175         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6176         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6177         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6178
6179         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6180         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6181         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6182         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6183         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6184
6185         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6186         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6187         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6188         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6189         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6190
6191         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6192         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6193         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6194         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6195         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6196
6197         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6198         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6199         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6200         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6201         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6202
6203         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6204         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6205         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6206         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6207         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6208         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6209         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6210         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6211
6212         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6213         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6214         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6215         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6216         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6217         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6218         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6219
6220         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6221         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6222         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6223         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6224         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6225         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6226         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6227
6228         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6229         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6230         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6231         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6232         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6233         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6234         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6235
6236         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6237         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6238         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6239         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6240         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6241         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6242         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6243
6244         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6245         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6246         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6247         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6248         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6249         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6250         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6251
6252         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6253         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6254         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6255         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6256         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6257         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6258         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6259
6260         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6261         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6262         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6263         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6264         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6265         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6266         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6267
6268         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6269         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6270         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6271         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6272         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6273         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6274         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6275
6276         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6277         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6278         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6279         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6280         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6281         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6282         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6283         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6284         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6285         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6286
6287         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6288         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6289         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6290         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6291         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6292         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6293         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6294         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6295         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6296         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6297         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6298         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6299
6300         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6301                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6302                                 asid);
6303                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6304                                 asid);
6305                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6306                                 asid);
6307                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6308                                 asid);
6309                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6310                                 asid);
6311         }
6312
6313         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6314                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6315                                 asid);
6316                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6317                                 asid);
6318                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6319                                 asid);
6320                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6321                                 asid);
6322                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6323                                 asid);
6324         }
6325
6326         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6327                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6328                                 asid);
6329                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6330                                 asid);
6331                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6332                                 asid);
6333                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6334                                 asid);
6335                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6336                                 asid);
6337         }
6338
6339         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6340                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6341                                 asid);
6342                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6343                                 asid);
6344                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6345                                 asid);
6346                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6347                                 asid);
6348                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6349                                 asid);
6350         }
6351
6352         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6353                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6354                                 asid);
6355                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6356                                 asid);
6357                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6358                                 asid);
6359                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6360                                 asid);
6361                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6362                                 asid);
6363         }
6364
6365         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6366                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6367                                 asid);
6368                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6369                                 asid);
6370                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6371                                 asid);
6372                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6373                                 asid);
6374                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6375                                 asid);
6376         }
6377
6378         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6379                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6380                                 asid);
6381                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6382                                 asid);
6383                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6384                                 asid);
6385                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6386                                 asid);
6387                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6388                                 asid);
6389         }
6390
6391         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6392                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6393                                 asid);
6394                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6395                                 asid);
6396                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6397                                 asid);
6398                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6399                                 asid);
6400                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6401                                 asid);
6402         }
6403
6404         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6405                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6406                                 asid);
6407                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6408                                 asid);
6409                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6410                                 asid);
6411                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6412                                 asid);
6413                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6414                                 asid);
6415         }
6416
6417         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6418                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6419                                 asid);
6420                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6421                                 asid);
6422                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6423                                 asid);
6424                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6425                                 asid);
6426                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6427                                 asid);
6428         }
6429
6430         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6431         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6432 }
6433
6434 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6435                 struct hl_cs_job *job)
6436 {
6437         struct packet_msg_prot *fence_pkt;
6438         u32 *fence_ptr;
6439         dma_addr_t fence_dma_addr;
6440         struct hl_cb *cb;
6441         u32 tmp, timeout, dma_offset;
6442         int rc;
6443
6444         if (hdev->pldm)
6445                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6446         else
6447                 timeout = HL_DEVICE_TIMEOUT_USEC;
6448
6449         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6450                 dev_err_ratelimited(hdev->dev,
6451                         "Can't send driver job on QMAN0 because the device is not idle\n");
6452                 return -EBUSY;
6453         }
6454
6455         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6456         if (!fence_ptr) {
6457                 dev_err(hdev->dev,
6458                         "Failed to allocate fence memory for QMAN0\n");
6459                 return -ENOMEM;
6460         }
6461
6462         cb = job->patched_cb;
6463
6464         fence_pkt = cb->kernel_address +
6465                         job->job_cb_size - sizeof(struct packet_msg_prot);
6466
6467         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6468         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6469         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6470
6471         fence_pkt->ctl = cpu_to_le32(tmp);
6472         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6473         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6474
6475         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6476
6477         WREG32(mmDMA0_CORE_PROT + dma_offset,
6478                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6479
6480         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6481                                         job->job_cb_size, cb->bus_address);
6482         if (rc) {
6483                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6484                 goto free_fence_ptr;
6485         }
6486
6487         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6488                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6489                                 timeout, true);
6490
6491         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6492
6493         if (rc == -ETIMEDOUT) {
6494                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6495                 goto free_fence_ptr;
6496         }
6497
6498 free_fence_ptr:
6499         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6500
6501         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6502         return rc;
6503 }
6504
6505 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6506 {
6507         if (event_type >= GAUDI_EVENT_SIZE)
6508                 goto event_not_supported;
6509
6510         if (!gaudi_irq_map_table[event_type].valid)
6511                 goto event_not_supported;
6512
6513         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6514
6515         return;
6516
6517 event_not_supported:
6518         snprintf(desc, size, "N/A");
6519 }
6520
6521 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6522                                                         bool is_write, s32 *engine_id_1,
6523                                                         s32 *engine_id_2)
6524 {
6525         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6526
6527         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6528                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6529
6530         switch (x_y) {
6531         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6532         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6533                 dma_id[0] = 0;
6534                 dma_id[1] = 2;
6535                 break;
6536         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6537         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6538                 dma_id[0] = 1;
6539                 dma_id[1] = 3;
6540                 break;
6541         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6542         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6543                 dma_id[0] = 4;
6544                 dma_id[1] = 6;
6545                 break;
6546         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6547         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6548                 dma_id[0] = 5;
6549                 dma_id[1] = 7;
6550                 break;
6551         default:
6552                 goto unknown_initiator;
6553         }
6554
6555         for (i = 0 ; i < 2 ; i++) {
6556                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6557                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6558         }
6559
6560         switch (x_y) {
6561         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6562         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6563                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6564                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6565                         return "DMA0";
6566                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6567                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6568                         return "DMA2";
6569                 } else {
6570                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6571                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6572                         return "DMA0 or DMA2";
6573                 }
6574         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6575         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6576                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6577                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6578                         return "DMA1";
6579                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6580                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6581                         return "DMA3";
6582                 } else {
6583                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6584                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6585                         return "DMA1 or DMA3";
6586                 }
6587         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6588         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6589                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6590                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6591                         return "DMA4";
6592                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6593                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6594                         return "DMA6";
6595                 } else {
6596                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6597                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6598                         return "DMA4 or DMA6";
6599                 }
6600         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6601         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6602                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6603                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6604                         return "DMA5";
6605                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6606                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6607                         return "DMA7";
6608                 } else {
6609                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6610                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6611                         return "DMA5 or DMA7";
6612                 }
6613         }
6614
6615 unknown_initiator:
6616         return "unknown initiator";
6617 }
6618
6619 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6620                                                         u32 *engine_id_1, u32 *engine_id_2)
6621 {
6622         u32 val, x_y, axi_id;
6623
6624         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6625                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6626         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6627                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6628         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6629                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6630
6631         switch (x_y) {
6632         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6633                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6634                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6635                         return "TPC0";
6636                 }
6637                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6638                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6639                         return "NIC0";
6640                 }
6641                 break;
6642         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6643                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6644                 return "TPC1";
6645         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6646         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6647                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6648                 return "MME0";
6649         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6650         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6651                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6652                 return "MME1";
6653         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6654                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6655                 return "TPC2";
6656         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6657                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6658                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6659                         return "TPC3";
6660                 }
6661                 /* PCI, CPU or PSOC does not have engine id*/
6662                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6663                         return "PCI";
6664                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6665                         return "CPU";
6666                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6667                         return "PSOC";
6668                 break;
6669         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6670         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6671         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6672         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6673         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6674         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6675         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6676         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6677                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6678                                 engine_id_1, engine_id_2);
6679         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6680                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6681                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6682                         return "TPC4";
6683                 }
6684                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6685                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6686                         return "NIC1";
6687                 }
6688                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6689                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6690                         return "NIC2";
6691                 }
6692                 break;
6693         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6694                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6695                 return "TPC5";
6696         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6697         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6698                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6699                 return "MME2";
6700         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6701         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6702                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6703                 return "MME3";
6704         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6705                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6706                 return "TPC6";
6707         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6708                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6709                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6710                         return "TPC7";
6711                 }
6712                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6713                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6714                         return "NIC4";
6715                 }
6716                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6717                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6718                         return "NIC5";
6719                 }
6720                 break;
6721         default:
6722                 break;
6723         }
6724
6725         dev_err(hdev->dev,
6726                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6727                 val,
6728                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6729                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6730                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6731                         RAZWI_INITIATOR_AXI_ID_MASK);
6732
6733         return "unknown initiator";
6734 }
6735
6736 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
6737                                                 u32 *engine_id_2)
6738 {
6739
6740         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6741                 dev_err_ratelimited(hdev->dev,
6742                         "RAZWI event caused by illegal write of %s\n",
6743                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6744                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6745         }
6746
6747         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6748                 dev_err_ratelimited(hdev->dev,
6749                         "RAZWI event caused by illegal read of %s\n",
6750                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6751                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6752         }
6753 }
6754
6755 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
6756 {
6757         struct gaudi_device *gaudi = hdev->asic_specific;
6758         u32 val;
6759
6760         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6761                 return;
6762
6763         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6764         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6765                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6766                 *addr <<= 32;
6767                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6768
6769                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6770                 *type = HL_RAZWI_PAGE_FAULT;
6771
6772                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6773         }
6774
6775         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6776         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6777                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6778                 *addr <<= 32;
6779                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6780
6781                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6782                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
6783
6784                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6785         }
6786 }
6787
6788 /*
6789  *  +-------------------+------------------------------------------------------+
6790  *  | Configuration Reg |                     Description                      |
6791  *  |      Address      |                                                      |
6792  *  +-------------------+------------------------------------------------------+
6793  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6794  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6795  *  |                   |0xF34 memory wrappers 63:32                           |
6796  *  |                   |0xF38 memory wrappers 95:64                           |
6797  *  |                   |0xF3C memory wrappers 127:96                          |
6798  *  +-------------------+------------------------------------------------------+
6799  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6800  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6801  *  |                   |0xF44 memory wrappers 63:32                           |
6802  *  |                   |0xF48 memory wrappers 95:64                           |
6803  *  |                   |0xF4C memory wrappers 127:96                          |
6804  *  +-------------------+------------------------------------------------------+
6805  */
6806 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6807                 struct ecc_info_extract_params *params, u64 *ecc_address,
6808                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6809 {
6810         u32 i, num_mem_regs, reg, err_bit;
6811         u64 err_addr, err_word = 0;
6812
6813         num_mem_regs = params->num_memories / 32 +
6814                         ((params->num_memories % 32) ? 1 : 0);
6815
6816         if (params->block_address >= CFG_BASE)
6817                 params->block_address -= CFG_BASE;
6818
6819         if (params->derr)
6820                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6821         else
6822                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6823
6824         /* Set invalid wrapper index */
6825         *memory_wrapper_idx = 0xFF;
6826
6827         /* Iterate through memory wrappers, a single bit must be set */
6828         for (i = 0 ; i < num_mem_regs ; i++) {
6829                 err_addr += i * 4;
6830                 err_word = RREG32(err_addr);
6831                 if (err_word) {
6832                         err_bit = __ffs(err_word);
6833                         *memory_wrapper_idx = err_bit + (32 * i);
6834                         break;
6835                 }
6836         }
6837
6838         if (*memory_wrapper_idx == 0xFF) {
6839                 dev_err(hdev->dev, "ECC error information cannot be found\n");
6840                 return -EINVAL;
6841         }
6842
6843         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6844                         *memory_wrapper_idx);
6845
6846         *ecc_address =
6847                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6848         *ecc_syndrom =
6849                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6850
6851         /* Clear error indication */
6852         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6853         if (params->derr)
6854                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6855         else
6856                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6857
6858         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6859
6860         return 0;
6861 }
6862
6863 /*
6864  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6865  *
6866  * @idx: the current pi/ci value
6867  * @q_len: the queue length (power of 2)
6868  *
6869  * @return the cyclically decremented index
6870  */
6871 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6872 {
6873         u32 mask = q_len - 1;
6874
6875         /*
6876          * modular decrement is equivalent to adding (queue_size -1)
6877          * later we take LSBs to make sure the value is in the
6878          * range [0, queue_len - 1]
6879          */
6880         return (idx + q_len - 1) & mask;
6881 }
6882
6883 /**
6884  * gaudi_handle_sw_config_stream_data - print SW config stream data
6885  *
6886  * @hdev: pointer to the habanalabs device structure
6887  * @stream: the QMAN's stream
6888  * @qman_base: base address of QMAN registers block
6889  * @event_mask: mask of the last events occurred
6890  */
6891 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6892                                                 u64 qman_base, u64 event_mask)
6893 {
6894         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6895         u32 cq_ptr_lo_off, size;
6896
6897         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6898
6899         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6900                                                 stream * cq_ptr_lo_off;
6901         cq_ptr_hi = cq_ptr_lo +
6902                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6903         cq_tsize = cq_ptr_lo +
6904                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6905
6906         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6907         size = RREG32(cq_tsize);
6908         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6909                                                         stream, cq_ptr, size);
6910
6911         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6912                 hdev->last_error.undef_opcode.cq_addr = cq_ptr;
6913                 hdev->last_error.undef_opcode.cq_size = size;
6914                 hdev->last_error.undef_opcode.stream_id = stream;
6915         }
6916 }
6917
6918 /**
6919  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6920  *
6921  * @hdev: pointer to the habanalabs device structure
6922  * @qid_base: first QID of the QMAN (out of 4 streams)
6923  * @stream: the QMAN's stream
6924  * @qman_base: base address of QMAN registers block
6925  * @event_mask: mask of the last events occurred
6926  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6927  */
6928 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6929                                                 u32 stream, u64 qman_base,
6930                                                 u64 event_mask,
6931                                                 bool pr_sw_conf)
6932 {
6933         u32 ci, qm_ci_stream_off, queue_len;
6934         struct hl_hw_queue *q;
6935         u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6936         int i;
6937
6938         q = &hdev->kernel_queues[qid_base + stream];
6939
6940         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6941         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6942                                                 stream * qm_ci_stream_off;
6943
6944         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6945                                         q->int_queue_len : HL_QUEUE_LENGTH;
6946
6947         hdev->asic_funcs->hw_queues_lock(hdev);
6948
6949         if (pr_sw_conf)
6950                 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6951
6952         ci = RREG32(pq_ci);
6953
6954         /* we should start printing form ci -1 */
6955         ci = gaudi_queue_idx_dec(ci, queue_len);
6956         memset(addr, 0, sizeof(addr));
6957
6958         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6959                 struct hl_bd *bd;
6960                 u32 len;
6961
6962                 bd = q->kernel_address;
6963                 bd += ci;
6964
6965                 len = le32_to_cpu(bd->len);
6966                 /* len 0 means uninitialized entry- break */
6967                 if (!len)
6968                         break;
6969
6970                 addr[i] = le64_to_cpu(bd->ptr);
6971
6972                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6973                                                         stream, ci, addr[i], len);
6974
6975                 /* get previous ci, wrap if needed */
6976                 ci = gaudi_queue_idx_dec(ci, queue_len);
6977         }
6978
6979         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6980                 struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode;
6981                 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6982
6983                 if (arr_idx == 0) {
6984                         undef_opcode->timestamp = ktime_get();
6985                         undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6986                 }
6987
6988                 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6989                 undef_opcode->cb_addr_streams_len++;
6990         }
6991
6992         hdev->asic_funcs->hw_queues_unlock(hdev);
6993 }
6994
6995 /**
6996  * handle_qman_data_on_err - extract QMAN data on error
6997  *
6998  * @hdev: pointer to the habanalabs device structure
6999  * @qid_base: first QID of the QMAN (out of 4 streams)
7000  * @stream: the QMAN's stream
7001  * @qman_base: base address of QMAN registers block
7002  * @event_mask: mask of the last events occurred
7003  *
7004  * This function attempt to exatract as much data as possible on QMAN error.
7005  * On upper CP print the SW config stream data and last 8 PQEs.
7006  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7007  */
7008 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7009                                    u32 stream, u64 qman_base, u64 event_mask)
7010 {
7011         u32 i;
7012
7013         if (stream != QMAN_STREAMS) {
7014                 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7015                         qman_base, event_mask, true);
7016                 return;
7017         }
7018
7019         /* handle Lower-CP */
7020         gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7021
7022         for (i = 0; i < QMAN_STREAMS; i++)
7023                 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7024                         qman_base, event_mask, false);
7025 }
7026
7027 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7028                                           const char *qm_name,
7029                                           u64 qman_base,
7030                                           u32 qid_base,
7031                                           u64 *event_mask)
7032 {
7033         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7034         u64 glbl_sts_addr, arb_err_addr;
7035         char reg_desc[32];
7036
7037         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7038         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7039
7040         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7041         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7042                 glbl_sts_clr_val = 0;
7043                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7044
7045                 if (!glbl_sts_val)
7046                         continue;
7047
7048                 if (i == QMAN_STREAMS)
7049                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7050                 else
7051                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7052
7053                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7054                         if (glbl_sts_val & BIT(j)) {
7055                                 dev_err_ratelimited(hdev->dev,
7056                                                 "%s %s. err cause: %s\n",
7057                                                 qm_name, reg_desc,
7058                                                 gaudi_qman_error_cause[j]);
7059                                 glbl_sts_clr_val |= BIT(j);
7060                         }
7061                 }
7062                 /* check for undefined opcode */
7063                 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7064                                 hdev->last_error.undef_opcode.write_enable) {
7065                         memset(&hdev->last_error.undef_opcode, 0,
7066                                                 sizeof(hdev->last_error.undef_opcode));
7067
7068                         hdev->last_error.undef_opcode.write_enable = false;
7069                         *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7070                 }
7071
7072                 /* Write 1 clear errors */
7073                 if (!hdev->stop_on_err)
7074                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7075                 else
7076                         handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7077         }
7078
7079         arb_err_val = RREG32(arb_err_addr);
7080
7081         if (!arb_err_val)
7082                 return;
7083
7084         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7085                 if (arb_err_val & BIT(j)) {
7086                         dev_err_ratelimited(hdev->dev,
7087                                         "%s ARB_ERR. err cause: %s\n",
7088                                         qm_name,
7089                                         gaudi_qman_arb_error_cause[j]);
7090                 }
7091         }
7092 }
7093
7094 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7095                 struct hl_eq_sm_sei_data *sei_data)
7096 {
7097         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7098
7099         /* Flip the bits as the enum is ordered in the opposite way */
7100         index = (index ^ 0x3) & 0x3;
7101
7102         switch (sei_data->sei_cause) {
7103         case SM_SEI_SO_OVERFLOW:
7104                 dev_err_ratelimited(hdev->dev,
7105                         "%s SEI Error: SOB Group %u overflow/underflow",
7106                         gaudi_sync_manager_names[index],
7107                         le32_to_cpu(sei_data->sei_log));
7108                 break;
7109         case SM_SEI_LBW_4B_UNALIGNED:
7110                 dev_err_ratelimited(hdev->dev,
7111                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7112                         gaudi_sync_manager_names[index],
7113                         le32_to_cpu(sei_data->sei_log));
7114                 break;
7115         case SM_SEI_AXI_RESPONSE_ERR:
7116                 dev_err_ratelimited(hdev->dev,
7117                         "%s SEI Error: AXI ID %u response error",
7118                         gaudi_sync_manager_names[index],
7119                         le32_to_cpu(sei_data->sei_log));
7120                 break;
7121         default:
7122                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7123                                 le32_to_cpu(sei_data->sei_log));
7124                 break;
7125         }
7126 }
7127
7128 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7129                 struct hl_eq_ecc_data *ecc_data)
7130 {
7131         struct ecc_info_extract_params params;
7132         u64 ecc_address = 0, ecc_syndrom = 0;
7133         u8 index, memory_wrapper_idx = 0;
7134         bool extract_info_from_fw;
7135         int rc;
7136
7137         if (hdev->asic_prop.fw_security_enabled) {
7138                 extract_info_from_fw = true;
7139                 goto extract_ecc_info;
7140         }
7141
7142         switch (event_type) {
7143         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7144         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7145                 extract_info_from_fw = true;
7146                 break;
7147         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7148                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7149                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7150                 params.num_memories = 90;
7151                 params.derr = false;
7152                 extract_info_from_fw = false;
7153                 break;
7154         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7155                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7156                 params.block_address =
7157                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7158                 params.num_memories = 90;
7159                 params.derr = true;
7160                 extract_info_from_fw = false;
7161                 break;
7162         case GAUDI_EVENT_MME0_ACC_SERR:
7163         case GAUDI_EVENT_MME1_ACC_SERR:
7164         case GAUDI_EVENT_MME2_ACC_SERR:
7165         case GAUDI_EVENT_MME3_ACC_SERR:
7166                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7167                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7168                 params.num_memories = 128;
7169                 params.derr = false;
7170                 extract_info_from_fw = false;
7171                 break;
7172         case GAUDI_EVENT_MME0_ACC_DERR:
7173         case GAUDI_EVENT_MME1_ACC_DERR:
7174         case GAUDI_EVENT_MME2_ACC_DERR:
7175         case GAUDI_EVENT_MME3_ACC_DERR:
7176                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7177                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7178                 params.num_memories = 128;
7179                 params.derr = true;
7180                 extract_info_from_fw = false;
7181                 break;
7182         case GAUDI_EVENT_MME0_SBAB_SERR:
7183         case GAUDI_EVENT_MME1_SBAB_SERR:
7184         case GAUDI_EVENT_MME2_SBAB_SERR:
7185         case GAUDI_EVENT_MME3_SBAB_SERR:
7186                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7187                 params.block_address =
7188                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7189                 params.num_memories = 33;
7190                 params.derr = false;
7191                 extract_info_from_fw = false;
7192                 break;
7193         case GAUDI_EVENT_MME0_SBAB_DERR:
7194         case GAUDI_EVENT_MME1_SBAB_DERR:
7195         case GAUDI_EVENT_MME2_SBAB_DERR:
7196         case GAUDI_EVENT_MME3_SBAB_DERR:
7197                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7198                 params.block_address =
7199                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7200                 params.num_memories = 33;
7201                 params.derr = true;
7202                 extract_info_from_fw = false;
7203                 break;
7204         default:
7205                 return;
7206         }
7207
7208 extract_ecc_info:
7209         if (extract_info_from_fw) {
7210                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7211                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7212                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7213         } else {
7214                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7215                                 &ecc_syndrom, &memory_wrapper_idx);
7216                 if (rc)
7217                         return;
7218         }
7219
7220         dev_err(hdev->dev,
7221                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7222                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7223 }
7224
7225 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7226 {
7227         u64 qman_base;
7228         char desc[32];
7229         u32 qid_base;
7230         u8 index;
7231
7232         switch (event_type) {
7233         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7234                 /* In TPC QM event, notify on TPC assertion. While there isn't
7235                  * a specific event for assertion yet, the FW generates QM event.
7236                  * The SW upper layer will inspect an internal mapped area to indicate
7237                  * if the event is a tpc assertion or tpc QM.
7238                  */
7239                 *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7240                 index = event_type - GAUDI_EVENT_TPC0_QM;
7241                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7242                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7243                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7244                 break;
7245         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7246                 if (event_type == GAUDI_EVENT_MME0_QM) {
7247                         index = 0;
7248                         qid_base = GAUDI_QUEUE_ID_MME_0_0;
7249                 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7250                         index = 2;
7251                         qid_base = GAUDI_QUEUE_ID_MME_1_0;
7252                 }
7253                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7254                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7255                 break;
7256         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7257                 index = event_type - GAUDI_EVENT_DMA0_QM;
7258                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7259                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7260                 if (index > 1)
7261                         qid_base++;
7262                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7263                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7264                 break;
7265         case GAUDI_EVENT_NIC0_QM0:
7266                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7267                 qman_base = mmNIC0_QM0_BASE;
7268                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7269                 break;
7270         case GAUDI_EVENT_NIC0_QM1:
7271                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7272                 qman_base = mmNIC0_QM1_BASE;
7273                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7274                 break;
7275         case GAUDI_EVENT_NIC1_QM0:
7276                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7277                 qman_base = mmNIC1_QM0_BASE;
7278                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7279                 break;
7280         case GAUDI_EVENT_NIC1_QM1:
7281                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7282                 qman_base = mmNIC1_QM1_BASE;
7283                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7284                 break;
7285         case GAUDI_EVENT_NIC2_QM0:
7286                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7287                 qman_base = mmNIC2_QM0_BASE;
7288                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7289                 break;
7290         case GAUDI_EVENT_NIC2_QM1:
7291                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7292                 qman_base = mmNIC2_QM1_BASE;
7293                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7294                 break;
7295         case GAUDI_EVENT_NIC3_QM0:
7296                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7297                 qman_base = mmNIC3_QM0_BASE;
7298                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7299                 break;
7300         case GAUDI_EVENT_NIC3_QM1:
7301                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7302                 qman_base = mmNIC3_QM1_BASE;
7303                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7304                 break;
7305         case GAUDI_EVENT_NIC4_QM0:
7306                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7307                 qman_base = mmNIC4_QM0_BASE;
7308                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7309                 break;
7310         case GAUDI_EVENT_NIC4_QM1:
7311                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7312                 qman_base = mmNIC4_QM1_BASE;
7313                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7314                 break;
7315         default:
7316                 return;
7317         }
7318
7319         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7320 }
7321
7322 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7323                                         bool razwi)
7324 {
7325         u32 engine_id_1, engine_id_2;
7326         char desc[64] = "";
7327         u64 razwi_addr = 0;
7328         u8 razwi_type;
7329         int rc;
7330
7331         /*
7332          * Init engine id by default as not valid and only if razwi initiated from engine with
7333          * engine id it will get valid value.
7334          * Init razwi type to default, will be changed only if razwi caused by page fault of
7335          * MMU access error
7336          */
7337         engine_id_1 = U16_MAX;
7338         engine_id_2 = U16_MAX;
7339         razwi_type = U8_MAX;
7340
7341         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7342         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7343                 event_type, desc);
7344
7345         if (razwi) {
7346                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7347                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7348
7349                 /* In case it's the first razwi, save its parameters*/
7350                 rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0);
7351                 if (rc) {
7352                         hdev->last_error.razwi.timestamp = ktime_get();
7353                         hdev->last_error.razwi.addr = razwi_addr;
7354                         hdev->last_error.razwi.engine_id_1 = engine_id_1;
7355                         hdev->last_error.razwi.engine_id_2 = engine_id_2;
7356                         /*
7357                          * If first engine id holds non valid value the razwi initiator
7358                          * does not have engine id
7359                          */
7360                         hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
7361                         hdev->last_error.razwi.type = razwi_type;
7362
7363                 }
7364         }
7365 }
7366
7367 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7368                                         struct cpucp_pkt_sync_err *sync_err)
7369 {
7370         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7371
7372         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7373                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7374 }
7375
7376 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7377                                         struct hl_eq_fw_alive *fw_alive)
7378 {
7379         dev_err(hdev->dev,
7380                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7381                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7382                 "Minor" : "Critical", fw_alive->process_id,
7383                 fw_alive->thread_id, fw_alive->uptime_seconds);
7384 }
7385
7386 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7387                                                 void *data)
7388 {
7389         char desc[64] = "", *type;
7390         struct eq_nic_sei_event *eq_nic_sei = data;
7391         u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7392
7393         switch (eq_nic_sei->axi_error_cause) {
7394         case RXB:
7395                 type = "RXB";
7396                 break;
7397         case RXE:
7398                 type = "RXE";
7399                 break;
7400         case TXS:
7401                 type = "TXS";
7402                 break;
7403         case TXE:
7404                 type = "TXE";
7405                 break;
7406         case QPC_RESP:
7407                 type = "QPC_RESP";
7408                 break;
7409         case NON_AXI_ERR:
7410                 type = "NON_AXI_ERR";
7411                 break;
7412         case TMR:
7413                 type = "TMR";
7414                 break;
7415         default:
7416                 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7417                         eq_nic_sei->axi_error_cause);
7418                 type = "N/A";
7419                 break;
7420         }
7421
7422         snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7423                         eq_nic_sei->id);
7424         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7425                 event_type, desc);
7426 }
7427
7428 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7429 {
7430         /* GAUDI doesn't support any reset except hard-reset */
7431         return -EPERM;
7432 }
7433
7434 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7435                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7436 {
7437         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7438         int rc = 0;
7439
7440         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7441                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7442                 if (!hbm_ecc_data) {
7443                         dev_err(hdev->dev, "No FW ECC data");
7444                         return 0;
7445                 }
7446
7447                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7448                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7449                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7450                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7451                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7452                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7453                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7454                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7455                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7456                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7457                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7458                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7459                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7460                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7461
7462                 dev_err(hdev->dev,
7463                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7464                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7465                 dev_err(hdev->dev,
7466                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7467                         device, ch, hbm_ecc_data->first_addr, type,
7468                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7469                         hbm_ecc_data->dec_cnt);
7470                 return 0;
7471         }
7472
7473         if (hdev->asic_prop.fw_security_enabled) {
7474                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7475                 return 0;
7476         }
7477
7478         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7479         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7480                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7481                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7482                 if (val) {
7483                         rc = -EIO;
7484                         dev_err(hdev->dev,
7485                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7486                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7487                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7488                                 (val >> 4) & 0x1);
7489
7490                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7491                         dev_err(hdev->dev,
7492                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7493                                 device, ch * 2,
7494                                 RREG32(base + ch * 0x1000 + 0x064),
7495                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7496                                 (val2 & 0xFF0000) >> 16,
7497                                 (val2 & 0xFF000000) >> 24);
7498                 }
7499
7500                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7501                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7502                 if (val) {
7503                         rc = -EIO;
7504                         dev_err(hdev->dev,
7505                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7506                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7507                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7508                                 (val >> 4) & 0x1);
7509
7510                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7511                         dev_err(hdev->dev,
7512                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7513                                 device, ch * 2 + 1,
7514                                 RREG32(base + ch * 0x1000 + 0x074),
7515                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7516                                 (val2 & 0xFF0000) >> 16,
7517                                 (val2 & 0xFF000000) >> 24);
7518                 }
7519
7520                 /* Clear interrupts */
7521                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7522                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7523                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7524                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7525                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7526                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7527         }
7528
7529         val  = RREG32(base + 0x8F30);
7530         val2 = RREG32(base + 0x8F34);
7531         if (val | val2) {
7532                 rc = -EIO;
7533                 dev_err(hdev->dev,
7534                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7535                         device, val, val2);
7536         }
7537         val  = RREG32(base + 0x8F40);
7538         val2 = RREG32(base + 0x8F44);
7539         if (val | val2) {
7540                 rc = -EIO;
7541                 dev_err(hdev->dev,
7542                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7543                         device, val, val2);
7544         }
7545
7546         return rc;
7547 }
7548
7549 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7550 {
7551         switch (hbm_event_type) {
7552         case GAUDI_EVENT_HBM0_SPI_0:
7553         case GAUDI_EVENT_HBM0_SPI_1:
7554                 return 0;
7555         case GAUDI_EVENT_HBM1_SPI_0:
7556         case GAUDI_EVENT_HBM1_SPI_1:
7557                 return 1;
7558         case GAUDI_EVENT_HBM2_SPI_0:
7559         case GAUDI_EVENT_HBM2_SPI_1:
7560                 return 2;
7561         case GAUDI_EVENT_HBM3_SPI_0:
7562         case GAUDI_EVENT_HBM3_SPI_1:
7563                 return 3;
7564         default:
7565                 break;
7566         }
7567
7568         /* Should never happen */
7569         return 0;
7570 }
7571
7572 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7573                                         char *interrupt_name)
7574 {
7575         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7576         bool soft_reset_required = false;
7577
7578         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7579                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7580
7581         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7582                 if (tpc_interrupts_cause & BIT(i)) {
7583                         dev_err_ratelimited(hdev->dev,
7584                                         "TPC%d_%s interrupt cause: %s\n",
7585                                         tpc_id, interrupt_name,
7586                                         gaudi_tpc_interrupts_cause[i]);
7587                         /* If this is QM error, we need to soft-reset */
7588                         if (i == 15)
7589                                 soft_reset_required = true;
7590                 }
7591
7592         /* Clear interrupts */
7593         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7594
7595         return soft_reset_required;
7596 }
7597
7598 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7599 {
7600         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7601 }
7602
7603 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7604 {
7605         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7606 }
7607
7608 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
7609 {
7610         ktime_t zero_time = ktime_set(0, 0);
7611
7612         mutex_lock(&hdev->clk_throttling.lock);
7613
7614         switch (event_type) {
7615         case GAUDI_EVENT_FIX_POWER_ENV_S:
7616                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7617                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7618                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7619                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7620                 dev_info_ratelimited(hdev->dev,
7621                         "Clock throttling due to power consumption\n");
7622                 break;
7623
7624         case GAUDI_EVENT_FIX_POWER_ENV_E:
7625                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7626                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7627                 dev_info_ratelimited(hdev->dev,
7628                         "Power envelop is safe, back to optimal clock\n");
7629                 break;
7630
7631         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7632                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7633                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7634                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7635                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7636                 dev_info_ratelimited(hdev->dev,
7637                         "Clock throttling due to overheating\n");
7638                 break;
7639
7640         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7641                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7642                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7643                 dev_info_ratelimited(hdev->dev,
7644                         "Thermal envelop is safe, back to optimal clock\n");
7645                 break;
7646
7647         default:
7648                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7649                         event_type);
7650                 break;
7651         }
7652
7653         mutex_unlock(&hdev->clk_throttling.lock);
7654 }
7655
7656 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7657 {
7658         struct gaudi_device *gaudi = hdev->asic_specific;
7659         u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7660         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7661         u32 fw_fatal_err_flag = 0, flags = 0;
7662         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7663                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7664         bool reset_required, reset_direct = false;
7665         u8 cause;
7666         int rc;
7667
7668         if (event_type >= GAUDI_EVENT_SIZE) {
7669                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7670                                 event_type, GAUDI_EVENT_SIZE - 1);
7671                 return;
7672         }
7673
7674         gaudi->events_stat[event_type]++;
7675         gaudi->events_stat_aggregate[event_type]++;
7676
7677         switch (event_type) {
7678         case GAUDI_EVENT_PCIE_CORE_DERR:
7679         case GAUDI_EVENT_PCIE_IF_DERR:
7680         case GAUDI_EVENT_PCIE_PHY_DERR:
7681         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7682         case GAUDI_EVENT_MME0_ACC_DERR:
7683         case GAUDI_EVENT_MME0_SBAB_DERR:
7684         case GAUDI_EVENT_MME1_ACC_DERR:
7685         case GAUDI_EVENT_MME1_SBAB_DERR:
7686         case GAUDI_EVENT_MME2_ACC_DERR:
7687         case GAUDI_EVENT_MME2_SBAB_DERR:
7688         case GAUDI_EVENT_MME3_ACC_DERR:
7689         case GAUDI_EVENT_MME3_SBAB_DERR:
7690         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7691                 fallthrough;
7692         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7693         case GAUDI_EVENT_PSOC_MEM_DERR:
7694         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7695         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7696         case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7697         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7698         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7699         case GAUDI_EVENT_MMU_DERR:
7700         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7701                 gaudi_print_irq_info(hdev, event_type, true);
7702                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7703                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7704                 goto reset_device;
7705
7706         case GAUDI_EVENT_GIC500:
7707         case GAUDI_EVENT_AXI_ECC:
7708         case GAUDI_EVENT_L2_RAM_ECC:
7709         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7710                 gaudi_print_irq_info(hdev, event_type, false);
7711                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7712                 goto reset_device;
7713
7714         case GAUDI_EVENT_HBM0_SPI_0:
7715         case GAUDI_EVENT_HBM1_SPI_0:
7716         case GAUDI_EVENT_HBM2_SPI_0:
7717         case GAUDI_EVENT_HBM3_SPI_0:
7718                 gaudi_print_irq_info(hdev, event_type, false);
7719                 gaudi_hbm_read_interrupts(hdev,
7720                                 gaudi_hbm_event_to_dev(event_type),
7721                                 &eq_entry->hbm_ecc_data);
7722                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7723                 goto reset_device;
7724
7725         case GAUDI_EVENT_HBM0_SPI_1:
7726         case GAUDI_EVENT_HBM1_SPI_1:
7727         case GAUDI_EVENT_HBM2_SPI_1:
7728         case GAUDI_EVENT_HBM3_SPI_1:
7729                 gaudi_print_irq_info(hdev, event_type, false);
7730                 gaudi_hbm_read_interrupts(hdev,
7731                                 gaudi_hbm_event_to_dev(event_type),
7732                                 &eq_entry->hbm_ecc_data);
7733                 hl_fw_unmask_irq(hdev, event_type);
7734                 break;
7735
7736         case GAUDI_EVENT_TPC0_DEC:
7737         case GAUDI_EVENT_TPC1_DEC:
7738         case GAUDI_EVENT_TPC2_DEC:
7739         case GAUDI_EVENT_TPC3_DEC:
7740         case GAUDI_EVENT_TPC4_DEC:
7741         case GAUDI_EVENT_TPC5_DEC:
7742         case GAUDI_EVENT_TPC6_DEC:
7743         case GAUDI_EVENT_TPC7_DEC:
7744                 gaudi_print_irq_info(hdev, event_type, true);
7745                 reset_required = gaudi_tpc_read_interrupts(hdev,
7746                                         tpc_dec_event_to_tpc_id(event_type),
7747                                         "AXI_SLV_DEC_Error");
7748                 if (reset_required) {
7749                         dev_err(hdev->dev, "reset required due to %s\n",
7750                                 gaudi_irq_map_table[event_type].name);
7751
7752                         reset_direct = true;
7753                         goto reset_device;
7754                 } else {
7755                         hl_fw_unmask_irq(hdev, event_type);
7756                 }
7757                 break;
7758
7759         case GAUDI_EVENT_TPC0_KRN_ERR:
7760         case GAUDI_EVENT_TPC1_KRN_ERR:
7761         case GAUDI_EVENT_TPC2_KRN_ERR:
7762         case GAUDI_EVENT_TPC3_KRN_ERR:
7763         case GAUDI_EVENT_TPC4_KRN_ERR:
7764         case GAUDI_EVENT_TPC5_KRN_ERR:
7765         case GAUDI_EVENT_TPC6_KRN_ERR:
7766         case GAUDI_EVENT_TPC7_KRN_ERR:
7767                 gaudi_print_irq_info(hdev, event_type, true);
7768                 reset_required = gaudi_tpc_read_interrupts(hdev,
7769                                         tpc_krn_event_to_tpc_id(event_type),
7770                                         "KRN_ERR");
7771                 if (reset_required) {
7772                         dev_err(hdev->dev, "reset required due to %s\n",
7773                                 gaudi_irq_map_table[event_type].name);
7774
7775                         reset_direct = true;
7776                         goto reset_device;
7777                 } else {
7778                         hl_fw_unmask_irq(hdev, event_type);
7779                 }
7780                 break;
7781
7782         case GAUDI_EVENT_PCIE_CORE_SERR:
7783         case GAUDI_EVENT_PCIE_IF_SERR:
7784         case GAUDI_EVENT_PCIE_PHY_SERR:
7785         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7786         case GAUDI_EVENT_MME0_ACC_SERR:
7787         case GAUDI_EVENT_MME0_SBAB_SERR:
7788         case GAUDI_EVENT_MME1_ACC_SERR:
7789         case GAUDI_EVENT_MME1_SBAB_SERR:
7790         case GAUDI_EVENT_MME2_ACC_SERR:
7791         case GAUDI_EVENT_MME2_SBAB_SERR:
7792         case GAUDI_EVENT_MME3_ACC_SERR:
7793         case GAUDI_EVENT_MME3_SBAB_SERR:
7794         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7795         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7796         case GAUDI_EVENT_PSOC_MEM_SERR:
7797         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7798         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7799         case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7800         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7801         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7802                 fallthrough;
7803         case GAUDI_EVENT_MMU_SERR:
7804                 gaudi_print_irq_info(hdev, event_type, true);
7805                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7806                 hl_fw_unmask_irq(hdev, event_type);
7807                 break;
7808
7809         case GAUDI_EVENT_PCIE_DEC:
7810         case GAUDI_EVENT_MME0_WBC_RSP:
7811         case GAUDI_EVENT_MME0_SBAB0_RSP:
7812         case GAUDI_EVENT_MME1_WBC_RSP:
7813         case GAUDI_EVENT_MME1_SBAB0_RSP:
7814         case GAUDI_EVENT_MME2_WBC_RSP:
7815         case GAUDI_EVENT_MME2_SBAB0_RSP:
7816         case GAUDI_EVENT_MME3_WBC_RSP:
7817         case GAUDI_EVENT_MME3_SBAB0_RSP:
7818         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7819         case GAUDI_EVENT_PSOC_AXI_DEC:
7820         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7821         case GAUDI_EVENT_MMU_PAGE_FAULT:
7822         case GAUDI_EVENT_MMU_WR_PERM:
7823         case GAUDI_EVENT_RAZWI_OR_ADC:
7824         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7825         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7826                 fallthrough;
7827         case GAUDI_EVENT_NIC0_QM0:
7828         case GAUDI_EVENT_NIC0_QM1:
7829         case GAUDI_EVENT_NIC1_QM0:
7830         case GAUDI_EVENT_NIC1_QM1:
7831         case GAUDI_EVENT_NIC2_QM0:
7832         case GAUDI_EVENT_NIC2_QM1:
7833         case GAUDI_EVENT_NIC3_QM0:
7834         case GAUDI_EVENT_NIC3_QM1:
7835         case GAUDI_EVENT_NIC4_QM0:
7836         case GAUDI_EVENT_NIC4_QM1:
7837         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7838         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7839                 gaudi_print_irq_info(hdev, event_type, true);
7840                 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7841                 hl_fw_unmask_irq(hdev, event_type);
7842                 break;
7843
7844         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7845                 gaudi_print_irq_info(hdev, event_type, true);
7846                 goto reset_device;
7847
7848         case GAUDI_EVENT_TPC0_BMON_SPMU:
7849         case GAUDI_EVENT_TPC1_BMON_SPMU:
7850         case GAUDI_EVENT_TPC2_BMON_SPMU:
7851         case GAUDI_EVENT_TPC3_BMON_SPMU:
7852         case GAUDI_EVENT_TPC4_BMON_SPMU:
7853         case GAUDI_EVENT_TPC5_BMON_SPMU:
7854         case GAUDI_EVENT_TPC6_BMON_SPMU:
7855         case GAUDI_EVENT_TPC7_BMON_SPMU:
7856         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7857                 gaudi_print_irq_info(hdev, event_type, false);
7858                 hl_fw_unmask_irq(hdev, event_type);
7859                 break;
7860
7861         case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7862                 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7863                 hl_fw_unmask_irq(hdev, event_type);
7864                 break;
7865
7866         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7867                 gaudi_print_irq_info(hdev, event_type, false);
7868                 gaudi_print_sm_sei_info(hdev, event_type,
7869                                         &eq_entry->sm_sei_data);
7870                 rc = hl_state_dump(hdev);
7871                 if (rc)
7872                         dev_err(hdev->dev,
7873                                 "Error during system state dump %d\n", rc);
7874                 hl_fw_unmask_irq(hdev, event_type);
7875                 break;
7876
7877         case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7878                 break;
7879
7880         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7881                 gaudi_print_clk_change_info(hdev, event_type);
7882                 hl_fw_unmask_irq(hdev, event_type);
7883                 break;
7884
7885         case GAUDI_EVENT_PSOC_GPIO_U16_0:
7886                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7887                 dev_err(hdev->dev,
7888                         "Received high temp H/W interrupt %d (cause %d)\n",
7889                         event_type, cause);
7890                 break;
7891
7892         case GAUDI_EVENT_DEV_RESET_REQ:
7893                 gaudi_print_irq_info(hdev, event_type, false);
7894                 goto reset_device;
7895
7896         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7897                 gaudi_print_irq_info(hdev, event_type, false);
7898                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7899                 goto reset_device;
7900
7901         case GAUDI_EVENT_FW_ALIVE_S:
7902                 gaudi_print_irq_info(hdev, event_type, false);
7903                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7904                 goto reset_device;
7905
7906         default:
7907                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7908                                 event_type);
7909                 break;
7910         }
7911
7912         if (event_mask)
7913                 hl_notifier_event_send_all(hdev, event_mask);
7914
7915         return;
7916
7917 reset_device:
7918         reset_required = true;
7919
7920         if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7921                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7922
7923                 /* notify on device unavailable while the reset triggered by fw */
7924                 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7925                                         HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7926         } else if (hdev->hard_reset_on_fw_events) {
7927                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7928                 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7929         } else {
7930                 reset_required = false;
7931         }
7932
7933         /* despite reset doesn't execute. a notification on
7934          * occurred event needs to be sent here
7935          */
7936         hl_notifier_event_send_all(hdev, event_mask);
7937         if (reset_required)
7938                 hl_device_reset(hdev, flags);
7939         else
7940                 hl_fw_unmask_irq(hdev, event_type);
7941 }
7942
7943 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7944 {
7945         struct gaudi_device *gaudi = hdev->asic_specific;
7946
7947         if (aggregate) {
7948                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7949                 return gaudi->events_stat_aggregate;
7950         }
7951
7952         *size = (u32) sizeof(gaudi->events_stat);
7953         return gaudi->events_stat;
7954 }
7955
7956 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7957 {
7958         struct gaudi_device *gaudi = hdev->asic_specific;
7959         u32 status, timeout_usec;
7960         int rc;
7961
7962         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7963                 hdev->reset_info.hard_reset_pending)
7964                 return 0;
7965
7966         if (hdev->pldm)
7967                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7968         else
7969                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7970
7971         /* L0 & L1 invalidation */
7972         WREG32(mmSTLB_INV_PS, 3);
7973         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7974         WREG32(mmSTLB_INV_PS, 2);
7975
7976         rc = hl_poll_timeout(
7977                 hdev,
7978                 mmSTLB_INV_PS,
7979                 status,
7980                 !status,
7981                 1000,
7982                 timeout_usec);
7983
7984         WREG32(mmSTLB_INV_SET, 0);
7985
7986         return rc;
7987 }
7988
7989 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7990                                                 bool is_hard, u32 flags,
7991                                                 u32 asid, u64 va, u64 size)
7992 {
7993         /* Treat as invalidate all because there is no range invalidation
7994          * in Gaudi
7995          */
7996         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7997 }
7998
7999 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8000 {
8001         u32 status, timeout_usec;
8002         int rc;
8003
8004         if (hdev->pldm)
8005                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8006         else
8007                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8008
8009         WREG32(MMU_ASID, asid);
8010         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8011         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8012         WREG32(MMU_BUSY, 0x80000000);
8013
8014         rc = hl_poll_timeout(
8015                 hdev,
8016                 MMU_BUSY,
8017                 status,
8018                 !(status & 0x80000000),
8019                 1000,
8020                 timeout_usec);
8021
8022         if (rc) {
8023                 dev_err(hdev->dev,
8024                         "Timeout during MMU hop0 config of asid %d\n", asid);
8025                 return rc;
8026         }
8027
8028         return 0;
8029 }
8030
8031 static int gaudi_send_heartbeat(struct hl_device *hdev)
8032 {
8033         struct gaudi_device *gaudi = hdev->asic_specific;
8034
8035         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8036                 return 0;
8037
8038         return hl_fw_send_heartbeat(hdev);
8039 }
8040
8041 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8042 {
8043         struct gaudi_device *gaudi = hdev->asic_specific;
8044         struct asic_fixed_properties *prop = &hdev->asic_prop;
8045         int rc;
8046
8047         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8048                 return 0;
8049
8050         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8051                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8052                                         mmCPU_BOOT_ERR1);
8053         if (rc)
8054                 return rc;
8055
8056         if (!strlen(prop->cpucp_info.card_name))
8057                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8058                                 CARD_NAME_MAX_LEN);
8059
8060         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8061
8062         set_default_power_values(hdev);
8063
8064         return 0;
8065 }
8066
8067 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8068                 struct engines_data *e)
8069 {
8070         struct gaudi_device *gaudi = hdev->asic_specific;
8071         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8072         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8073         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8074         unsigned long *mask = (unsigned long *)mask_arr;
8075         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8076         bool is_idle = true, is_eng_idle, is_slave;
8077         u64 offset;
8078         int i, dma_id, port;
8079
8080         if (e)
8081                 hl_engine_data_sprintf(e,
8082                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8083                         "---  -------  ------------  ----------  -------------\n");
8084
8085         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8086                 dma_id = gaudi_dma_assignment[i];
8087                 offset = dma_id * DMA_QMAN_OFFSET;
8088
8089                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8090                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8091                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8092                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8093                                 IS_DMA_IDLE(dma_core_sts0);
8094                 is_idle &= is_eng_idle;
8095
8096                 if (mask && !is_eng_idle)
8097                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8098                 if (e)
8099                         hl_engine_data_sprintf(e, fmt, dma_id,
8100                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8101                                 qm_cgm_sts, dma_core_sts0);
8102         }
8103
8104         if (e)
8105                 hl_engine_data_sprintf(e,
8106                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8107                         "---  -------  ------------  ----------  ----------\n");
8108
8109         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8110                 offset = i * TPC_QMAN_OFFSET;
8111                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8112                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8113                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8114                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8115                                 IS_TPC_IDLE(tpc_cfg_sts);
8116                 is_idle &= is_eng_idle;
8117
8118                 if (mask && !is_eng_idle)
8119                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8120                 if (e)
8121                         hl_engine_data_sprintf(e, fmt, i,
8122                                 is_eng_idle ? "Y" : "N",
8123                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8124         }
8125
8126         if (e)
8127                 hl_engine_data_sprintf(e,
8128                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8129                         "---  -------  ------------  ----------  -----------\n");
8130
8131         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8132                 offset = i * MME_QMAN_OFFSET;
8133                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8134                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8135
8136                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8137                 is_slave = i % 2;
8138                 if (!is_slave) {
8139                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8140                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8141                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8142                 }
8143
8144                 is_idle &= is_eng_idle;
8145
8146                 if (mask && !is_eng_idle)
8147                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8148                 if (e) {
8149                         if (!is_slave)
8150                                 hl_engine_data_sprintf(e, fmt, i,
8151                                         is_eng_idle ? "Y" : "N",
8152                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8153                         else
8154                                 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8155                                         is_eng_idle ? "Y" : "N", "-",
8156                                         "-", mme_arch_sts);
8157                 }
8158         }
8159
8160         if (e)
8161                 hl_engine_data_sprintf(e,
8162                                 "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8163                                 "---  -------  ------------  ----------\n");
8164
8165         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8166                 offset = i * NIC_MACRO_QMAN_OFFSET;
8167                 port = 2 * i;
8168                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8169                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8170                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8171                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8172                         is_idle &= is_eng_idle;
8173
8174                         if (mask && !is_eng_idle)
8175                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8176                         if (e)
8177                                 hl_engine_data_sprintf(e, nic_fmt, port,
8178                                                 is_eng_idle ? "Y" : "N",
8179                                                 qm_glbl_sts0, qm_cgm_sts);
8180                 }
8181
8182                 port = 2 * i + 1;
8183                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8184                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8185                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8186                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8187                         is_idle &= is_eng_idle;
8188
8189                         if (mask && !is_eng_idle)
8190                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8191                         if (e)
8192                                 hl_engine_data_sprintf(e, nic_fmt, port,
8193                                                 is_eng_idle ? "Y" : "N",
8194                                                 qm_glbl_sts0, qm_cgm_sts);
8195                 }
8196         }
8197
8198         if (e)
8199                 hl_engine_data_sprintf(e, "\n");
8200
8201         return is_idle;
8202 }
8203
8204 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8205         __acquires(&gaudi->hw_queues_lock)
8206 {
8207         struct gaudi_device *gaudi = hdev->asic_specific;
8208
8209         spin_lock(&gaudi->hw_queues_lock);
8210 }
8211
8212 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8213         __releases(&gaudi->hw_queues_lock)
8214 {
8215         struct gaudi_device *gaudi = hdev->asic_specific;
8216
8217         spin_unlock(&gaudi->hw_queues_lock);
8218 }
8219
8220 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8221 {
8222         return hdev->pdev->device;
8223 }
8224
8225 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8226                                 size_t max_size)
8227 {
8228         struct gaudi_device *gaudi = hdev->asic_specific;
8229
8230         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8231                 return 0;
8232
8233         return hl_fw_get_eeprom_data(hdev, data, max_size);
8234 }
8235
8236 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8237 {
8238         struct gaudi_device *gaudi = hdev->asic_specific;
8239
8240         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8241                 return 0;
8242
8243         return hl_fw_get_monitor_dump(hdev, data);
8244 }
8245
8246 /*
8247  * this function should be used only during initialization and/or after reset,
8248  * when there are no active users.
8249  */
8250 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8251 {
8252         u64 kernel_timeout;
8253         u32 status, offset;
8254         int rc;
8255
8256         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8257
8258         if (hdev->pldm)
8259                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8260         else
8261                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8262
8263         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8264                         lower_32_bits(tpc_kernel));
8265         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8266                         upper_32_bits(tpc_kernel));
8267
8268         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8269                         lower_32_bits(tpc_kernel));
8270         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8271                         upper_32_bits(tpc_kernel));
8272         /* set a valid LUT pointer, content is of no significance */
8273         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8274                         lower_32_bits(tpc_kernel));
8275         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8276                         upper_32_bits(tpc_kernel));
8277
8278         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8279                         lower_32_bits(CFG_BASE +
8280                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8281
8282         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8283                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8284                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8285         /* wait a bit for the engine to start executing */
8286         usleep_range(1000, 1500);
8287
8288         /* wait until engine has finished executing */
8289         rc = hl_poll_timeout(
8290                 hdev,
8291                 mmTPC0_CFG_STATUS + offset,
8292                 status,
8293                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8294                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8295                 1000,
8296                 kernel_timeout);
8297
8298         if (rc) {
8299                 dev_err(hdev->dev,
8300                         "Timeout while waiting for TPC%d icache prefetch\n",
8301                         tpc_id);
8302                 return -EIO;
8303         }
8304
8305         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8306                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8307
8308         /* wait a bit for the engine to start executing */
8309         usleep_range(1000, 1500);
8310
8311         /* wait until engine has finished executing */
8312         rc = hl_poll_timeout(
8313                 hdev,
8314                 mmTPC0_CFG_STATUS + offset,
8315                 status,
8316                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8317                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8318                 1000,
8319                 kernel_timeout);
8320
8321         if (rc) {
8322                 dev_err(hdev->dev,
8323                         "Timeout while waiting for TPC%d vector pipe\n",
8324                         tpc_id);
8325                 return -EIO;
8326         }
8327
8328         rc = hl_poll_timeout(
8329                 hdev,
8330                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8331                 status,
8332                 (status == 0),
8333                 1000,
8334                 kernel_timeout);
8335
8336         if (rc) {
8337                 dev_err(hdev->dev,
8338                         "Timeout while waiting for TPC%d kernel to execute\n",
8339                         tpc_id);
8340                 return -EIO;
8341         }
8342
8343         return 0;
8344 }
8345
8346 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8347                 struct hl_ctx *ctx)
8348 {
8349         struct gaudi_device *gaudi = hdev->asic_specific;
8350         int min_alloc_order, rc, collective_cb_size;
8351
8352         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8353                 return 0;
8354
8355         hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8356                                                         HOST_SPACE_INTERNAL_CB_SZ,
8357                                                         &hdev->internal_cb_pool_dma_addr,
8358                                                         GFP_KERNEL | __GFP_ZERO);
8359
8360         if (!hdev->internal_cb_pool_virt_addr)
8361                 return -ENOMEM;
8362
8363         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8364                         sizeof(struct packet_fence);
8365         min_alloc_order = ilog2(collective_cb_size);
8366
8367         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8368         if (!hdev->internal_cb_pool) {
8369                 dev_err(hdev->dev,
8370                         "Failed to create internal CB pool\n");
8371                 rc = -ENOMEM;
8372                 goto free_internal_cb_pool;
8373         }
8374
8375         rc = gen_pool_add(hdev->internal_cb_pool,
8376                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8377                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8378         if (rc) {
8379                 dev_err(hdev->dev,
8380                         "Failed to add memory to internal CB pool\n");
8381                 rc = -EFAULT;
8382                 goto destroy_internal_cb_pool;
8383         }
8384
8385         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8386                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8387                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8388
8389         if (!hdev->internal_cb_va_base) {
8390                 rc = -ENOMEM;
8391                 goto destroy_internal_cb_pool;
8392         }
8393
8394         mutex_lock(&ctx->mmu_lock);
8395         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8396                         hdev->internal_cb_pool_dma_addr,
8397                         HOST_SPACE_INTERNAL_CB_SZ);
8398
8399         hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8400         mutex_unlock(&ctx->mmu_lock);
8401
8402         if (rc)
8403                 goto unreserve_internal_cb_pool;
8404
8405         return 0;
8406
8407 unreserve_internal_cb_pool:
8408         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8409                         HOST_SPACE_INTERNAL_CB_SZ);
8410 destroy_internal_cb_pool:
8411         gen_pool_destroy(hdev->internal_cb_pool);
8412 free_internal_cb_pool:
8413         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8414                                         hdev->internal_cb_pool_dma_addr);
8415
8416         return rc;
8417 }
8418
8419 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8420                 struct hl_ctx *ctx)
8421 {
8422         struct gaudi_device *gaudi = hdev->asic_specific;
8423
8424         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8425                 return;
8426
8427         mutex_lock(&ctx->mmu_lock);
8428         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8429                         HOST_SPACE_INTERNAL_CB_SZ);
8430         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8431                         HOST_SPACE_INTERNAL_CB_SZ);
8432         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8433         mutex_unlock(&ctx->mmu_lock);
8434
8435         gen_pool_destroy(hdev->internal_cb_pool);
8436
8437         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8438                                         hdev->internal_cb_pool_dma_addr);
8439 }
8440
8441 static int gaudi_ctx_init(struct hl_ctx *ctx)
8442 {
8443         int rc;
8444
8445         if (ctx->asid == HL_KERNEL_ASID_ID)
8446                 return 0;
8447
8448         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8449         if (rc)
8450                 return rc;
8451
8452         rc = gaudi_restore_user_registers(ctx->hdev);
8453         if (rc)
8454                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8455
8456         return rc;
8457 }
8458
8459 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8460 {
8461         if (ctx->asid == HL_KERNEL_ASID_ID)
8462                 return;
8463
8464         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8465 }
8466
8467 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8468 {
8469         return 0;
8470 }
8471
8472 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8473 {
8474         return gaudi_cq_assignment[cq_idx];
8475 }
8476
8477 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8478 {
8479         return sizeof(struct packet_msg_short) +
8480                         sizeof(struct packet_msg_prot) * 2;
8481 }
8482
8483 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8484 {
8485         return sizeof(struct packet_msg_short) * 4 +
8486                         sizeof(struct packet_fence) +
8487                         sizeof(struct packet_msg_prot) * 2;
8488 }
8489
8490 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8491 {
8492         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8493 }
8494
8495 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8496                                 u32 size, bool eb)
8497 {
8498         struct hl_cb *cb = (struct hl_cb *) data;
8499         struct packet_msg_short *pkt;
8500         u32 value, ctl, pkt_size = sizeof(*pkt);
8501
8502         pkt = cb->kernel_address + size;
8503         memset(pkt, 0, pkt_size);
8504
8505         /* Inc by 1, Mode ADD */
8506         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8507         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8508
8509         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8510         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8511         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8512         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8513         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8514         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8515         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8516
8517         pkt->value = cpu_to_le32(value);
8518         pkt->ctl = cpu_to_le32(ctl);
8519
8520         return size + pkt_size;
8521 }
8522
8523 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8524                                         u16 addr)
8525 {
8526         u32 ctl, pkt_size = sizeof(*pkt);
8527
8528         memset(pkt, 0, pkt_size);
8529
8530         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8531         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8532         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8533         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8534         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8535         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8536
8537         pkt->value = cpu_to_le32(value);
8538         pkt->ctl = cpu_to_le32(ctl);
8539
8540         return pkt_size;
8541 }
8542
8543 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8544                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8545                 u16 sob_val, u16 mon_id)
8546 {
8547         u64 monitor_base;
8548         u32 ctl, value, pkt_size = sizeof(*pkt);
8549         u16 msg_addr_offset;
8550         u8 mask;
8551
8552         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8553                 dev_err(hdev->dev,
8554                         "sob_base %u (mask %#x) is not valid\n",
8555                         sob_base, sob_mask);
8556                 return 0;
8557         }
8558
8559         /*
8560          * monitor_base should be the content of the base0 address registers,
8561          * so it will be added to the msg short offsets
8562          */
8563         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8564
8565         msg_addr_offset =
8566                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8567                                 monitor_base;
8568
8569         memset(pkt, 0, pkt_size);
8570
8571         /* Monitor config packet: bind the monitor to a sync object */
8572         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8573         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8574         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8575                         0); /* GREATER OR EQUAL*/
8576         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8577
8578         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8579         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8580         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8581         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8582         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8583         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8584         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8585
8586         pkt->value = cpu_to_le32(value);
8587         pkt->ctl = cpu_to_le32(ctl);
8588
8589         return pkt_size;
8590 }
8591
8592 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8593 {
8594         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8595
8596         memset(pkt, 0, pkt_size);
8597
8598         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8599         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8600         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8601
8602         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8603         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8604         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8605         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8606
8607         pkt->cfg = cpu_to_le32(cfg);
8608         pkt->ctl = cpu_to_le32(ctl);
8609
8610         return pkt_size;
8611 }
8612
8613 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8614 {
8615         u32 offset, nic_index;
8616
8617         switch (queue_id) {
8618         case GAUDI_QUEUE_ID_DMA_0_0:
8619                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8620                 break;
8621         case GAUDI_QUEUE_ID_DMA_0_1:
8622                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8623                 break;
8624         case GAUDI_QUEUE_ID_DMA_0_2:
8625                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8626                 break;
8627         case GAUDI_QUEUE_ID_DMA_0_3:
8628                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8629                 break;
8630         case GAUDI_QUEUE_ID_DMA_1_0:
8631                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8632                 break;
8633         case GAUDI_QUEUE_ID_DMA_1_1:
8634                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8635                 break;
8636         case GAUDI_QUEUE_ID_DMA_1_2:
8637                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8638                 break;
8639         case GAUDI_QUEUE_ID_DMA_1_3:
8640                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8641                 break;
8642         case GAUDI_QUEUE_ID_DMA_5_0:
8643                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8644                 break;
8645         case GAUDI_QUEUE_ID_DMA_5_1:
8646                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8647                 break;
8648         case GAUDI_QUEUE_ID_DMA_5_2:
8649                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8650                 break;
8651         case GAUDI_QUEUE_ID_DMA_5_3:
8652                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8653                 break;
8654         case GAUDI_QUEUE_ID_TPC_7_0:
8655                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8656                 break;
8657         case GAUDI_QUEUE_ID_TPC_7_1:
8658                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8659                 break;
8660         case GAUDI_QUEUE_ID_TPC_7_2:
8661                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8662                 break;
8663         case GAUDI_QUEUE_ID_TPC_7_3:
8664                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8665                 break;
8666         case GAUDI_QUEUE_ID_NIC_0_0:
8667         case GAUDI_QUEUE_ID_NIC_1_0:
8668         case GAUDI_QUEUE_ID_NIC_2_0:
8669         case GAUDI_QUEUE_ID_NIC_3_0:
8670         case GAUDI_QUEUE_ID_NIC_4_0:
8671         case GAUDI_QUEUE_ID_NIC_5_0:
8672         case GAUDI_QUEUE_ID_NIC_6_0:
8673         case GAUDI_QUEUE_ID_NIC_7_0:
8674         case GAUDI_QUEUE_ID_NIC_8_0:
8675         case GAUDI_QUEUE_ID_NIC_9_0:
8676                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8677                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8678                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8679                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8680                 break;
8681         case GAUDI_QUEUE_ID_NIC_0_1:
8682         case GAUDI_QUEUE_ID_NIC_1_1:
8683         case GAUDI_QUEUE_ID_NIC_2_1:
8684         case GAUDI_QUEUE_ID_NIC_3_1:
8685         case GAUDI_QUEUE_ID_NIC_4_1:
8686         case GAUDI_QUEUE_ID_NIC_5_1:
8687         case GAUDI_QUEUE_ID_NIC_6_1:
8688         case GAUDI_QUEUE_ID_NIC_7_1:
8689         case GAUDI_QUEUE_ID_NIC_8_1:
8690         case GAUDI_QUEUE_ID_NIC_9_1:
8691                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8692                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8693                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8694                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8695                 break;
8696         case GAUDI_QUEUE_ID_NIC_0_2:
8697         case GAUDI_QUEUE_ID_NIC_1_2:
8698         case GAUDI_QUEUE_ID_NIC_2_2:
8699         case GAUDI_QUEUE_ID_NIC_3_2:
8700         case GAUDI_QUEUE_ID_NIC_4_2:
8701         case GAUDI_QUEUE_ID_NIC_5_2:
8702         case GAUDI_QUEUE_ID_NIC_6_2:
8703         case GAUDI_QUEUE_ID_NIC_7_2:
8704         case GAUDI_QUEUE_ID_NIC_8_2:
8705         case GAUDI_QUEUE_ID_NIC_9_2:
8706                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8707                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8708                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8709                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8710                 break;
8711         case GAUDI_QUEUE_ID_NIC_0_3:
8712         case GAUDI_QUEUE_ID_NIC_1_3:
8713         case GAUDI_QUEUE_ID_NIC_2_3:
8714         case GAUDI_QUEUE_ID_NIC_3_3:
8715         case GAUDI_QUEUE_ID_NIC_4_3:
8716         case GAUDI_QUEUE_ID_NIC_5_3:
8717         case GAUDI_QUEUE_ID_NIC_6_3:
8718         case GAUDI_QUEUE_ID_NIC_7_3:
8719         case GAUDI_QUEUE_ID_NIC_8_3:
8720         case GAUDI_QUEUE_ID_NIC_9_3:
8721                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8722                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8723                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8724                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8725                 break;
8726         default:
8727                 return -EINVAL;
8728         }
8729
8730         *addr = CFG_BASE + offset;
8731
8732         return 0;
8733 }
8734
8735 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8736 {
8737         u64 monitor_base;
8738         u32 size = 0;
8739         u16 msg_addr_offset;
8740
8741         /*
8742          * monitor_base should be the content of the base0 address registers,
8743          * so it will be added to the msg short offsets
8744          */
8745         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8746
8747         /* First monitor config packet: low address of the sync */
8748         msg_addr_offset =
8749                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8750                                 monitor_base;
8751
8752         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8753                                         msg_addr_offset);
8754
8755         /* Second monitor config packet: high address of the sync */
8756         msg_addr_offset =
8757                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8758                                 monitor_base;
8759
8760         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8761                                         msg_addr_offset);
8762
8763         /*
8764          * Third monitor config packet: the payload, i.e. what to write when the
8765          * sync triggers
8766          */
8767         msg_addr_offset =
8768                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8769                                 monitor_base;
8770
8771         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8772
8773         return size;
8774 }
8775
8776 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8777                                 struct hl_gen_wait_properties *prop)
8778 {
8779         struct hl_cb *cb = (struct hl_cb *) prop->data;
8780         void *buf = cb->kernel_address;
8781         u64 fence_addr = 0;
8782         u32 size = prop->size;
8783
8784         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8785                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8786                                 prop->q_idx);
8787                 return 0;
8788         }
8789
8790         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8791         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8792                         prop->sob_mask, prop->sob_val, prop->mon_id);
8793         size += gaudi_add_fence_pkt(buf + size);
8794
8795         return size;
8796 }
8797
8798 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8799 {
8800         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8801
8802         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8803                 hw_sob->sob_id);
8804
8805         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8806                         hw_sob->sob_id * 4, 0);
8807
8808         kref_init(&hw_sob->kref);
8809 }
8810
8811 static u64 gaudi_get_device_time(struct hl_device *hdev)
8812 {
8813         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8814
8815         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8816 }
8817
8818 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8819                                 u32 *block_size, u32 *block_id)
8820 {
8821         return -EPERM;
8822 }
8823
8824 static int gaudi_block_mmap(struct hl_device *hdev,
8825                                 struct vm_area_struct *vma,
8826                                 u32 block_id, u32 block_size)
8827 {
8828         return -EPERM;
8829 }
8830
8831 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8832 {
8833         struct cpu_dyn_regs *dyn_regs =
8834                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8835         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8836                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8837                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
8838
8839         WREG32(irq_handler_offset,
8840                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8841 }
8842
8843 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8844 {
8845         return -EINVAL;
8846 }
8847
8848 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8849 {
8850         switch (pll_idx) {
8851         case HL_GAUDI_CPU_PLL: return CPU_PLL;
8852         case HL_GAUDI_PCI_PLL: return PCI_PLL;
8853         case HL_GAUDI_NIC_PLL: return NIC_PLL;
8854         case HL_GAUDI_DMA_PLL: return DMA_PLL;
8855         case HL_GAUDI_MESH_PLL: return MESH_PLL;
8856         case HL_GAUDI_MME_PLL: return MME_PLL;
8857         case HL_GAUDI_TPC_PLL: return TPC_PLL;
8858         case HL_GAUDI_IF_PLL: return IF_PLL;
8859         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8860         case HL_GAUDI_HBM_PLL: return HBM_PLL;
8861         default: return -EINVAL;
8862         }
8863 }
8864
8865 static int gaudi_add_sync_to_engine_map_entry(
8866         struct hl_sync_to_engine_map *map, u32 reg_value,
8867         enum hl_sync_engine_type engine_type, u32 engine_id)
8868 {
8869         struct hl_sync_to_engine_map_entry *entry;
8870
8871         /* Reg value represents a partial address of sync object,
8872          * it is used as unique identifier. For this we need to
8873          * clear the cutoff cfg base bits from the value.
8874          */
8875         if (reg_value == 0 || reg_value == 0xffffffff)
8876                 return 0;
8877         reg_value -= lower_32_bits(CFG_BASE);
8878
8879         /* create a new hash entry */
8880         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8881         if (!entry)
8882                 return -ENOMEM;
8883         entry->engine_type = engine_type;
8884         entry->engine_id = engine_id;
8885         entry->sync_id = reg_value;
8886         hash_add(map->tb, &entry->node, reg_value);
8887
8888         return 0;
8889 }
8890
8891 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8892                                 struct hl_sync_to_engine_map *map)
8893 {
8894         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8895         int i, j, rc;
8896         u32 reg_value;
8897
8898         /* Iterate over TPC engines */
8899         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8900
8901                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8902                                         sds->props[SP_NEXT_TPC] * i);
8903
8904                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8905                                                         ENGINE_TPC, i);
8906                 if (rc)
8907                         goto free_sync_to_engine_map;
8908         }
8909
8910         /* Iterate over MME engines */
8911         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8912                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8913
8914                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8915                                                 sds->props[SP_NEXT_MME] * i +
8916                                                 j * sizeof(u32));
8917
8918                         rc = gaudi_add_sync_to_engine_map_entry(
8919                                 map, reg_value, ENGINE_MME,
8920                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8921                         if (rc)
8922                                 goto free_sync_to_engine_map;
8923                 }
8924         }
8925
8926         /* Iterate over DMA engines */
8927         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8928                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8929                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
8930                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8931                                                         ENGINE_DMA, i);
8932                 if (rc)
8933                         goto free_sync_to_engine_map;
8934         }
8935
8936         return 0;
8937
8938 free_sync_to_engine_map:
8939         hl_state_dump_free_sync_to_engine_map(map);
8940
8941         return rc;
8942 }
8943
8944 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8945 {
8946         return FIELD_GET(
8947                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8948                 mon->status);
8949 }
8950
8951 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8952 {
8953         const size_t max_write = 10;
8954         u32 gid, mask, sob;
8955         int i, offset;
8956
8957         /* Sync object ID is calculated as follows:
8958          * (8 * group_id + cleared bits in mask)
8959          */
8960         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8961                         mon->arm_data);
8962         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8963                         mon->arm_data);
8964
8965         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8966                 max_write; mask >>= 1, i++) {
8967                 if (!(mask & 1)) {
8968                         sob = gid * MONITOR_MAX_SOBS + i;
8969
8970                         if (offset > 0)
8971                                 offset += snprintf(sobs + offset, max_write,
8972                                                         ", ");
8973
8974                         offset += snprintf(sobs + offset, max_write, "%u", sob);
8975                 }
8976         }
8977 }
8978
8979 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8980                                 struct hl_device *hdev,
8981                                 struct hl_mon_state_dump *mon)
8982 {
8983         const char *name;
8984         char scratch_buf1[BIN_REG_STRING_SIZE],
8985                 scratch_buf2[BIN_REG_STRING_SIZE];
8986         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8987
8988         name = hl_state_dump_get_monitor_name(hdev, mon);
8989         if (!name)
8990                 name = "";
8991
8992         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8993
8994         return hl_snprintf_resize(
8995                 buf, size, offset,
8996                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8997                 mon->id, name,
8998                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8999                                 mon->arm_data),
9000                 hl_format_as_binary(
9001                         scratch_buf1, sizeof(scratch_buf1),
9002                         FIELD_GET(
9003                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9004                                 mon->arm_data)),
9005                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9006                                 mon->arm_data),
9007                 mon->wr_data,
9008                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9009                 hl_format_as_binary(
9010                         scratch_buf2, sizeof(scratch_buf2),
9011                         FIELD_GET(
9012                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9013                                 mon->status)),
9014                 monitored_sobs);
9015 }
9016
9017
9018 static int gaudi_print_fences_single_engine(
9019         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9020         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9021         size_t *size, size_t *offset)
9022 {
9023         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9024         int rc = -ENOMEM, i;
9025         u32 *statuses, *fences;
9026
9027         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9028                         sizeof(*statuses), GFP_KERNEL);
9029         if (!statuses)
9030                 goto out;
9031
9032         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9033                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9034                          sizeof(*fences), GFP_KERNEL);
9035         if (!fences)
9036                 goto free_status;
9037
9038         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9039                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9040
9041         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9042                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9043                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9044
9045         /* The actual print */
9046         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9047                 u32 fence_id;
9048                 u64 fence_cnt, fence_rdata;
9049                 const char *engine_name;
9050
9051                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9052                         statuses[i]))
9053                         continue;
9054
9055                 fence_id =
9056                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9057                 fence_cnt = base_offset + CFG_BASE +
9058                         sizeof(u32) *
9059                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9060                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9061                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9062                 engine_name = hl_sync_engine_to_string(engine_type);
9063
9064                 rc = hl_snprintf_resize(
9065                         buf, size, offset,
9066                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9067                         engine_name, engine_id,
9068                         i, fence_id,
9069                         fence_cnt, engine_name, engine_id, fence_id, i,
9070                         fence_rdata, engine_name, engine_id, fence_id, i,
9071                         fences[fence_id],
9072                         statuses[i]);
9073                 if (rc)
9074                         goto free_fences;
9075         }
9076
9077         rc = 0;
9078
9079 free_fences:
9080         kfree(fences);
9081 free_status:
9082         kfree(statuses);
9083 out:
9084         return rc;
9085 }
9086
9087
9088 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9089         .monitor_valid = gaudi_monitor_valid,
9090         .print_single_monitor = gaudi_print_single_monitor,
9091         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9092         .print_fences_single_engine = gaudi_print_fences_single_engine,
9093 };
9094
9095 static void gaudi_state_dump_init(struct hl_device *hdev)
9096 {
9097         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9098         int i;
9099
9100         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9101                 hash_add(sds->so_id_to_str_tb,
9102                         &gaudi_so_id_to_str[i].node,
9103                         gaudi_so_id_to_str[i].id);
9104
9105         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9106                 hash_add(sds->monitor_id_to_str_tb,
9107                         &gaudi_monitor_id_to_str[i].node,
9108                         gaudi_monitor_id_to_str[i].id);
9109
9110         sds->props = gaudi_state_dump_specs_props;
9111
9112         sds->sync_namager_names = gaudi_sync_manager_names;
9113
9114         sds->funcs = gaudi_state_dump_funcs;
9115 }
9116
9117 static u32 *gaudi_get_stream_master_qid_arr(void)
9118 {
9119         return gaudi_stream_master;
9120 }
9121
9122 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9123 {
9124 }
9125
9126 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9127 {
9128         struct hl_device *hdev = dev_get_drvdata(dev);
9129         struct cpucp_info *cpucp_info;
9130
9131         cpucp_info = &hdev->asic_prop.cpucp_info;
9132
9133         return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9134 }
9135
9136 static DEVICE_ATTR_RO(infineon_ver);
9137
9138 static struct attribute *gaudi_vrm_dev_attrs[] = {
9139         &dev_attr_infineon_ver.attr,
9140         NULL,
9141 };
9142
9143 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9144                                         struct attribute_group *dev_vrm_attr_grp)
9145 {
9146         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9147         dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9148 }
9149
9150 static const struct hl_asic_funcs gaudi_funcs = {
9151         .early_init = gaudi_early_init,
9152         .early_fini = gaudi_early_fini,
9153         .late_init = gaudi_late_init,
9154         .late_fini = gaudi_late_fini,
9155         .sw_init = gaudi_sw_init,
9156         .sw_fini = gaudi_sw_fini,
9157         .hw_init = gaudi_hw_init,
9158         .hw_fini = gaudi_hw_fini,
9159         .halt_engines = gaudi_halt_engines,
9160         .suspend = gaudi_suspend,
9161         .resume = gaudi_resume,
9162         .mmap = gaudi_mmap,
9163         .ring_doorbell = gaudi_ring_doorbell,
9164         .pqe_write = gaudi_pqe_write,
9165         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9166         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9167         .scrub_device_mem = gaudi_scrub_device_mem,
9168         .scrub_device_dram = gaudi_scrub_device_dram,
9169         .get_int_queue_base = gaudi_get_int_queue_base,
9170         .test_queues = gaudi_test_queues,
9171         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9172         .asic_dma_pool_free = gaudi_dma_pool_free,
9173         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9174         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9175         .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9176         .cs_parser = gaudi_cs_parser,
9177         .asic_dma_map_sgtable = hl_dma_map_sgtable,
9178         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9179         .update_eq_ci = gaudi_update_eq_ci,
9180         .context_switch = gaudi_context_switch,
9181         .restore_phase_topology = gaudi_restore_phase_topology,
9182         .debugfs_read_dma = gaudi_debugfs_read_dma,
9183         .add_device_attr = gaudi_add_device_attr,
9184         .handle_eqe = gaudi_handle_eqe,
9185         .get_events_stat = gaudi_get_events_stat,
9186         .read_pte = gaudi_read_pte,
9187         .write_pte = gaudi_write_pte,
9188         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9189         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9190         .mmu_prefetch_cache_range = NULL,
9191         .send_heartbeat = gaudi_send_heartbeat,
9192         .debug_coresight = gaudi_debug_coresight,
9193         .is_device_idle = gaudi_is_device_idle,
9194         .compute_reset_late_init = gaudi_compute_reset_late_init,
9195         .hw_queues_lock = gaudi_hw_queues_lock,
9196         .hw_queues_unlock = gaudi_hw_queues_unlock,
9197         .get_pci_id = gaudi_get_pci_id,
9198         .get_eeprom_data = gaudi_get_eeprom_data,
9199         .get_monitor_dump = gaudi_get_monitor_dump,
9200         .send_cpu_message = gaudi_send_cpu_message,
9201         .pci_bars_map = gaudi_pci_bars_map,
9202         .init_iatu = gaudi_init_iatu,
9203         .rreg = hl_rreg,
9204         .wreg = hl_wreg,
9205         .halt_coresight = gaudi_halt_coresight,
9206         .ctx_init = gaudi_ctx_init,
9207         .ctx_fini = gaudi_ctx_fini,
9208         .pre_schedule_cs = gaudi_pre_schedule_cs,
9209         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9210         .load_firmware_to_device = gaudi_load_firmware_to_device,
9211         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9212         .get_signal_cb_size = gaudi_get_signal_cb_size,
9213         .get_wait_cb_size = gaudi_get_wait_cb_size,
9214         .gen_signal_cb = gaudi_gen_signal_cb,
9215         .gen_wait_cb = gaudi_gen_wait_cb,
9216         .reset_sob = gaudi_reset_sob,
9217         .reset_sob_group = gaudi_reset_sob_group,
9218         .get_device_time = gaudi_get_device_time,
9219         .pb_print_security_errors = NULL,
9220         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9221         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9222         .get_dec_base_addr = NULL,
9223         .scramble_addr = hl_mmu_scramble_addr,
9224         .descramble_addr = hl_mmu_descramble_addr,
9225         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9226         .get_hw_block_id = gaudi_get_hw_block_id,
9227         .hw_block_mmap = gaudi_block_mmap,
9228         .enable_events_from_fw = gaudi_enable_events_from_fw,
9229         .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9230         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9231         .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9232         .init_firmware_loader = gaudi_init_firmware_loader,
9233         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9234         .state_dump_init = gaudi_state_dump_init,
9235         .get_sob_addr = gaudi_get_sob_addr,
9236         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9237         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9238         .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9239         .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9240         .access_dev_mem = hl_access_dev_mem,
9241         .set_dram_bar_base = gaudi_set_hbm_bar_base,
9242 };
9243
9244 /**
9245  * gaudi_set_asic_funcs - set GAUDI function pointers
9246  *
9247  * @hdev: pointer to hl_device structure
9248  *
9249  */
9250 void gaudi_set_asic_funcs(struct hl_device *hdev)
9251 {
9252         hdev->asic_funcs = &gaudi_funcs;
9253 }