7f52935dc603136cde665085e36d5c0dea8af92c
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0xEE6b27FF /* 8 seconds */
99
100 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
101
102 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
103
104 #define MONITOR_SOB_STRING_SIZE         256
105
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107         GAUDI_QUEUE_ID_DMA_0_0,
108         GAUDI_QUEUE_ID_DMA_0_1,
109         GAUDI_QUEUE_ID_DMA_0_2,
110         GAUDI_QUEUE_ID_DMA_0_3,
111         GAUDI_QUEUE_ID_DMA_1_0,
112         GAUDI_QUEUE_ID_DMA_1_1,
113         GAUDI_QUEUE_ID_DMA_1_2,
114         GAUDI_QUEUE_ID_DMA_1_3
115 };
116
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121                 "gaudi cpu eq"
122 };
123
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136         [0] = GAUDI_QUEUE_ID_DMA_0_0,
137         [1] = GAUDI_QUEUE_ID_DMA_0_1,
138         [2] = GAUDI_QUEUE_ID_DMA_0_2,
139         [3] = GAUDI_QUEUE_ID_DMA_0_3,
140         [4] = GAUDI_QUEUE_ID_DMA_1_0,
141         [5] = GAUDI_QUEUE_ID_DMA_1_1,
142         [6] = GAUDI_QUEUE_ID_DMA_1_2,
143         [7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
148         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
149         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
150         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
151         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
152         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
153         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
154         [PACKET_FENCE]          = sizeof(struct packet_fence),
155         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
156         [PACKET_NOP]            = sizeof(struct packet_nop),
157         [PACKET_STOP]           = sizeof(struct packet_stop),
158         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
159         [PACKET_WAIT]           = sizeof(struct packet_wait),
160         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
161 };
162
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165         switch (id) {
166         case PACKET_WREG_32:
167         case PACKET_WREG_BULK:
168         case PACKET_MSG_LONG:
169         case PACKET_MSG_SHORT:
170         case PACKET_CP_DMA:
171         case PACKET_REPEAT:
172         case PACKET_MSG_PROT:
173         case PACKET_FENCE:
174         case PACKET_LIN_DMA:
175         case PACKET_NOP:
176         case PACKET_STOP:
177         case PACKET_ARB_POINT:
178         case PACKET_WAIT:
179         case PACKET_LOAD_AND_EXE:
180                 return true;
181         default:
182                 return false;
183         }
184 }
185
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188         "tpc_address_exceed_slm",
189         "tpc_div_by_0",
190         "tpc_spu_mac_overflow",
191         "tpc_spu_addsub_overflow",
192         "tpc_spu_abs_overflow",
193         "tpc_spu_fp_dst_nan_inf",
194         "tpc_spu_fp_dst_denorm",
195         "tpc_vpu_mac_overflow",
196         "tpc_vpu_addsub_overflow",
197         "tpc_vpu_abs_overflow",
198         "tpc_vpu_fp_dst_nan_inf",
199         "tpc_vpu_fp_dst_denorm",
200         "tpc_assertions",
201         "tpc_illegal_instruction",
202         "tpc_pc_wrap_around",
203         "tpc_qm_sw_err",
204         "tpc_hbw_rresp_err",
205         "tpc_hbw_bresp_err",
206         "tpc_lbw_rresp_err",
207         "tpc_lbw_bresp_err"
208 };
209
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212         "PQ AXI HBW error",
213         "CQ AXI HBW error",
214         "CP AXI HBW error",
215         "CP error due to undefined OPCODE",
216         "CP encountered STOP OPCODE",
217         "CP AXI LBW error",
218         "CP WRREG32 or WRBULK returned error",
219         "N/A",
220         "FENCE 0 inc over max value and clipped",
221         "FENCE 1 inc over max value and clipped",
222         "FENCE 2 inc over max value and clipped",
223         "FENCE 3 inc over max value and clipped",
224         "FENCE 0 dec under min value and clipped",
225         "FENCE 1 dec under min value and clipped",
226         "FENCE 2 dec under min value and clipped",
227         "FENCE 3 dec under min value and clipped"
228 };
229
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232         "Choice push while full error",
233         "Choice Q watchdog error",
234         "MSG AXI LBW returned with error"
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 static const int gaudi_queue_id_to_engine_id[] = {
434         [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435         [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436         [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437         [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438         [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439         [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440         [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441         [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442         [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443         [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444         [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445         [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446         [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447         [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448         [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449         [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450         [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451         [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452         [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453         [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454         [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455         [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456         [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457         [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458         [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459         [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460         [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461         [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462         [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469         "SYNC_MGR_E_N",
470         "SYNC_MGR_W_N",
471         "SYNC_MGR_E_S",
472         "SYNC_MGR_W_S",
473         NULL
474 };
475
476 struct ecc_info_extract_params {
477         u64 block_address;
478         u32 num_memories;
479         bool derr;
480 };
481
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483                                                                 u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485                                         struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487                                         u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489                                         u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491                                 u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497                                 u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499                                 struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504                 return HL_COLLECTIVE_MASTER;
505
506         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508                 return HL_COLLECTIVE_SLAVE;
509
510         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512                 return HL_COLLECTIVE_SLAVE;
513
514         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516                 return HL_COLLECTIVE_SLAVE;
517
518         return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523         struct asic_fixed_properties *prop = &hdev->asic_prop;
524
525         if (hdev->card_type == cpucp_card_type_pmc) {
526                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527
528                 if (prop->fw_security_enabled)
529                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530                 else
531                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532         } else {
533                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535         }
536 }
537
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540         struct asic_fixed_properties *prop = &hdev->asic_prop;
541         u32 num_sync_stream_queues = 0;
542         int i;
543
544         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545         prop->hw_queues_props = kcalloc(prop->max_queues,
546                         sizeof(struct hw_queue_properties),
547                         GFP_KERNEL);
548
549         if (!prop->hw_queues_props)
550                 return -ENOMEM;
551
552         for (i = 0 ; i < prop->max_queues ; i++) {
553                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555                         prop->hw_queues_props[i].driver_only = 0;
556                         prop->hw_queues_props[i].supports_sync_stream = 1;
557                         prop->hw_queues_props[i].cb_alloc_flags =
558                                 CB_ALLOC_KERNEL;
559                         num_sync_stream_queues++;
560                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562                         prop->hw_queues_props[i].driver_only = 1;
563                         prop->hw_queues_props[i].supports_sync_stream = 0;
564                         prop->hw_queues_props[i].cb_alloc_flags =
565                                 CB_ALLOC_KERNEL;
566                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568                         prop->hw_queues_props[i].driver_only = 0;
569                         prop->hw_queues_props[i].supports_sync_stream = 0;
570                         prop->hw_queues_props[i].cb_alloc_flags =
571                                 CB_ALLOC_USER;
572
573                 }
574                 prop->hw_queues_props[i].collective_mode =
575                                                 get_collective_mode(hdev, i);
576         }
577
578         prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579         prop->cfg_base_address = CFG_BASE;
580         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581         prop->host_base_address = HOST_PHYS_BASE;
582         prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584         prop->completion_mode = HL_COMPLETION_MODE_JOB;
585         prop->collective_first_sob = 0;
586         prop->collective_first_mon = 0;
587
588         /* 2 SOBs per internal queue stream are reserved for collective */
589         prop->sync_stream_first_sob =
590                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591                         * QMAN_STREAMS * HL_RSVD_SOBS;
592
593         /* 1 monitor per internal queue stream are reserved for collective
594          * 2 monitors per external queue stream are reserved for collective
595          */
596         prop->sync_stream_first_mon =
597                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598                         (NUMBER_OF_EXT_HW_QUEUES * 2);
599
600         prop->dram_base_address = DRAM_PHYS_BASE;
601         prop->dram_size = GAUDI_HBM_SIZE_32GB;
602         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604
605         prop->sram_base_address = SRAM_BASE_ADDR;
606         prop->sram_size = SRAM_SIZE;
607         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608         prop->sram_user_base_address =
609                         prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610
611         prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612         prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613
614         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615         if (hdev->pldm)
616                 prop->mmu_pgt_size = 0x800000; /* 8MB */
617         else
618                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619         prop->mmu_pte_size = HL_PTE_SIZE;
620         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622         prop->dram_page_size = PAGE_SIZE_2MB;
623         prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624         prop->dram_supports_virtual_memory = false;
625
626         prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627         prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628         prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629         prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630         prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631         prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632         prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633         prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634         prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635         prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636         prop->pmmu.start_addr = VA_HOST_SPACE_START;
637         prop->pmmu.end_addr =
638                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639         prop->pmmu.page_size = PAGE_SIZE_4KB;
640         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641         prop->pmmu.last_mask = LAST_MASK;
642         /* TODO: will be duplicated until implementing per-MMU props */
643         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645
646         /* PMMU and HPMMU are the same except of page size */
647         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649
650         /* shifts and masks are the same in PMMU and DMMU */
651         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653         prop->dmmu.end_addr = VA_HOST_SPACE_END;
654         prop->dmmu.page_size = PAGE_SIZE_2MB;
655
656         prop->cfg_size = CFG_SIZE;
657         prop->max_asid = MAX_ASID;
658         prop->num_of_events = GAUDI_EVENT_SIZE;
659         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660
661         set_default_power_values(hdev);
662
663         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665
666         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668
669         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670                                         CARD_NAME_MAX_LEN);
671
672         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673
674         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675                         prop->sync_stream_first_sob +
676                         (num_sync_stream_queues * HL_RSVD_SOBS);
677         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678                         prop->sync_stream_first_mon +
679                         (num_sync_stream_queues * HL_RSVD_MONS);
680
681         prop->first_available_user_interrupt = USHRT_MAX;
682
683         for (i = 0 ; i < HL_MAX_DCORES ; i++)
684                 prop->first_available_cq[i] = USHRT_MAX;
685
686         prop->fw_cpu_boot_dev_sts0_valid = false;
687         prop->fw_cpu_boot_dev_sts1_valid = false;
688         prop->hard_reset_done_by_fw = false;
689         prop->gic_interrupts_enable = true;
690
691         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692
693         prop->clk_pll_index = HL_GAUDI_MME_PLL;
694         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695
696         prop->use_get_power_for_reset_history = true;
697
698         prop->configurable_stop_on_err = true;
699
700         prop->set_max_power_on_device_init = true;
701
702         prop->dma_mask = 48;
703
704         return 0;
705 }
706
707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709         static const char * const name[] = {"SRAM", "CFG", "HBM"};
710         bool is_wc[3] = {false, false, true};
711         int rc;
712
713         rc = hl_pci_bars_map(hdev, name, is_wc);
714         if (rc)
715                 return rc;
716
717         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
719
720         return 0;
721 }
722
723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725         struct gaudi_device *gaudi = hdev->asic_specific;
726         struct hl_inbound_pci_region pci_region;
727         u64 old_addr = addr;
728         int rc;
729
730         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731                 return old_addr;
732
733         if (hdev->asic_prop.iatu_done_by_fw)
734                 return U64_MAX;
735
736         /* Inbound Region 2 - Bar 4 - Point to HBM */
737         pci_region.mode = PCI_BAR_MATCH_MODE;
738         pci_region.bar = HBM_BAR_ID;
739         pci_region.addr = addr;
740         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741         if (rc)
742                 return U64_MAX;
743
744         if (gaudi) {
745                 old_addr = gaudi->hbm_bar_cur_addr;
746                 gaudi->hbm_bar_cur_addr = addr;
747         }
748
749         return old_addr;
750 }
751
752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754         struct hl_inbound_pci_region inbound_region;
755         struct hl_outbound_pci_region outbound_region;
756         int rc;
757
758         if (hdev->asic_prop.iatu_done_by_fw)
759                 return 0;
760
761         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762         inbound_region.mode = PCI_BAR_MATCH_MODE;
763         inbound_region.bar = SRAM_BAR_ID;
764         inbound_region.addr = SRAM_BASE_ADDR;
765         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766         if (rc)
767                 goto done;
768
769         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770         inbound_region.mode = PCI_BAR_MATCH_MODE;
771         inbound_region.bar = CFG_BAR_ID;
772         inbound_region.addr = SPI_FLASH_BASE_ADDR;
773         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774         if (rc)
775                 goto done;
776
777         /* Inbound Region 2 - Bar 4 - Point to HBM */
778         inbound_region.mode = PCI_BAR_MATCH_MODE;
779         inbound_region.bar = HBM_BAR_ID;
780         inbound_region.addr = DRAM_PHYS_BASE;
781         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782         if (rc)
783                 goto done;
784
785         /* Outbound Region 0 - Point to Host */
786         outbound_region.addr = HOST_PHYS_BASE;
787         outbound_region.size = HOST_PHYS_SIZE;
788         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789
790 done:
791         return rc;
792 }
793
794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796         return RREG32(mmHW_STATE);
797 }
798
799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801         struct asic_fixed_properties *prop = &hdev->asic_prop;
802         struct pci_dev *pdev = hdev->pdev;
803         resource_size_t pci_bar_size;
804         u32 fw_boot_status;
805         int rc;
806
807         rc = gaudi_set_fixed_properties(hdev);
808         if (rc) {
809                 dev_err(hdev->dev, "Failed setting fixed properties\n");
810                 return rc;
811         }
812
813         /* Check BAR sizes */
814         pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815
816         if (pci_bar_size != SRAM_BAR_SIZE) {
817                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818                         SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819                 rc = -ENODEV;
820                 goto free_queue_props;
821         }
822
823         pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824
825         if (pci_bar_size != CFG_BAR_SIZE) {
826                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827                         CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828                 rc = -ENODEV;
829                 goto free_queue_props;
830         }
831
832         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834
835         /* If FW security is enabled at this point it means no access to ELBI */
836         if (hdev->asic_prop.fw_security_enabled) {
837                 hdev->asic_prop.iatu_done_by_fw = true;
838
839                 /*
840                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841                  * decision can only be taken based on PCI ID security.
842                  */
843                 hdev->asic_prop.gic_interrupts_enable = false;
844                 goto pci_init;
845         }
846
847         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848                                 &fw_boot_status);
849         if (rc)
850                 goto free_queue_props;
851
852         /* Check whether FW is configuring iATU */
853         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855                 hdev->asic_prop.iatu_done_by_fw = true;
856
857 pci_init:
858         rc = hl_pci_init(hdev);
859         if (rc)
860                 goto free_queue_props;
861
862         /* Before continuing in the initialization, we need to read the preboot
863          * version to determine whether we run with a security-enabled firmware
864          */
865         rc = hl_fw_read_preboot_status(hdev);
866         if (rc) {
867                 if (hdev->reset_on_preboot_fail)
868                         hdev->asic_funcs->hw_fini(hdev, true, false);
869                 goto pci_fini;
870         }
871
872         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
873                 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
874                 hdev->asic_funcs->hw_fini(hdev, true, false);
875         }
876
877         return 0;
878
879 pci_fini:
880         hl_pci_fini(hdev);
881 free_queue_props:
882         kfree(hdev->asic_prop.hw_queues_props);
883         return rc;
884 }
885
886 static int gaudi_early_fini(struct hl_device *hdev)
887 {
888         kfree(hdev->asic_prop.hw_queues_props);
889         hl_pci_fini(hdev);
890
891         return 0;
892 }
893
894 /**
895  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
896  *
897  * @hdev: pointer to hl_device structure
898  *
899  */
900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
901 {
902         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
903         struct asic_fixed_properties *prop = &hdev->asic_prop;
904         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
905         int rc;
906
907         if ((hdev->fw_components & FW_TYPE_LINUX) &&
908                         (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
909                 struct gaudi_device *gaudi = hdev->asic_specific;
910
911                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
912                         return 0;
913
914                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
915
916                 if (rc)
917                         return rc;
918
919                 freq = pll_freq_arr[2];
920         } else {
921                 /* Backward compatibility */
922                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
923                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
924                 nr = RREG32(mmPSOC_CPU_PLL_NR);
925                 nf = RREG32(mmPSOC_CPU_PLL_NF);
926                 od = RREG32(mmPSOC_CPU_PLL_OD);
927
928                 if (div_sel == DIV_SEL_REF_CLK ||
929                                 div_sel == DIV_SEL_DIVIDED_REF) {
930                         if (div_sel == DIV_SEL_REF_CLK)
931                                 freq = PLL_REF_CLK;
932                         else
933                                 freq = PLL_REF_CLK / (div_fctr + 1);
934                 } else if (div_sel == DIV_SEL_PLL_CLK ||
935                         div_sel == DIV_SEL_DIVIDED_PLL) {
936                         pll_clk = PLL_REF_CLK * (nf + 1) /
937                                         ((nr + 1) * (od + 1));
938                         if (div_sel == DIV_SEL_PLL_CLK)
939                                 freq = pll_clk;
940                         else
941                                 freq = pll_clk / (div_fctr + 1);
942                 } else {
943                         dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
944                         freq = 0;
945                 }
946         }
947
948         prop->psoc_timestamp_frequency = freq;
949         prop->psoc_pci_pll_nr = nr;
950         prop->psoc_pci_pll_nf = nf;
951         prop->psoc_pci_pll_od = od;
952         prop->psoc_pci_pll_div_factor = div_fctr;
953
954         return 0;
955 }
956
957 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
958                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
959 {
960         struct asic_fixed_properties *prop = &hdev->asic_prop;
961         struct packet_lin_dma *init_tpc_mem_pkt;
962         struct hl_cs_job *job;
963         struct hl_cb *cb;
964         u64 dst_addr;
965         u32 cb_size, ctl;
966         u8 tpc_id;
967         int rc;
968
969         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
970         if (!cb)
971                 return -EFAULT;
972
973         init_tpc_mem_pkt = cb->kernel_address;
974         cb_size = sizeof(*init_tpc_mem_pkt);
975         memset(init_tpc_mem_pkt, 0, cb_size);
976
977         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
978
979         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
980         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
981         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
982         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
983
984         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
985
986         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
987         dst_addr = (prop->sram_user_base_address &
988                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
989                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
990         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
991
992         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
993         if (!job) {
994                 dev_err(hdev->dev, "Failed to allocate a new job\n");
995                 rc = -ENOMEM;
996                 goto release_cb;
997         }
998
999         job->id = 0;
1000         job->user_cb = cb;
1001         atomic_inc(&job->user_cb->cs_cnt);
1002         job->user_cb_size = cb_size;
1003         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1004         job->patched_cb = job->user_cb;
1005         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1006
1007         hl_debugfs_add_job(hdev, job);
1008
1009         rc = gaudi_send_job_on_qman0(hdev, job);
1010
1011         if (rc)
1012                 goto free_job;
1013
1014         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1015                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1016                 if (rc)
1017                         break;
1018         }
1019
1020 free_job:
1021         hl_userptr_delete_list(hdev, &job->userptr_list);
1022         hl_debugfs_remove_job(hdev, job);
1023         kfree(job);
1024         atomic_dec(&cb->cs_cnt);
1025
1026 release_cb:
1027         hl_cb_put(cb);
1028         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1029
1030         return rc;
1031 }
1032
1033 /*
1034  * gaudi_init_tpc_mem() - Initialize TPC memories.
1035  * @hdev: Pointer to hl_device structure.
1036  *
1037  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1038  *
1039  * Return: 0 for success, negative value for error.
1040  */
1041 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1042 {
1043         const struct firmware *fw;
1044         size_t fw_size;
1045         void *cpu_addr;
1046         dma_addr_t dma_handle;
1047         int rc, count = 5;
1048
1049 again:
1050         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1051         if (rc == -EINTR && count-- > 0) {
1052                 msleep(50);
1053                 goto again;
1054         }
1055
1056         if (rc) {
1057                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1058                                 GAUDI_TPC_FW_FILE);
1059                 goto out;
1060         }
1061
1062         fw_size = fw->size;
1063         cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1064         if (!cpu_addr) {
1065                 dev_err(hdev->dev,
1066                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1067                         fw_size);
1068                 rc = -ENOMEM;
1069                 goto out;
1070         }
1071
1072         memcpy(cpu_addr, fw->data, fw_size);
1073
1074         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1075
1076         hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1077
1078 out:
1079         release_firmware(fw);
1080         return rc;
1081 }
1082
1083 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1084 {
1085         struct gaudi_device *gaudi = hdev->asic_specific;
1086         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1087         struct hl_hw_queue *q;
1088         u32 i, sob_id, sob_group_id, queue_id;
1089
1090         /* Iterate through SOB groups and assign a SOB for each slave queue */
1091         sob_group_id =
1092                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1093         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1094
1095         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1096         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1097                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1098                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1099         }
1100
1101         /* Both DMA5 and TPC7 use the same resources since only a single
1102          * engine need to participate in the reduction process
1103          */
1104         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1105         q = &hdev->kernel_queues[queue_id];
1106         q->sync_stream_prop.collective_sob_id =
1107                         sob_id + NIC_NUMBER_OF_ENGINES;
1108
1109         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1110         q = &hdev->kernel_queues[queue_id];
1111         q->sync_stream_prop.collective_sob_id =
1112                         sob_id + NIC_NUMBER_OF_ENGINES;
1113 }
1114
1115 static void gaudi_sob_group_hw_reset(struct kref *ref)
1116 {
1117         struct gaudi_hw_sob_group *hw_sob_group =
1118                 container_of(ref, struct gaudi_hw_sob_group, kref);
1119         struct hl_device *hdev = hw_sob_group->hdev;
1120         int i;
1121
1122         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1123                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1124                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1125
1126         kref_init(&hw_sob_group->kref);
1127 }
1128
1129 static void gaudi_sob_group_reset_error(struct kref *ref)
1130 {
1131         struct gaudi_hw_sob_group *hw_sob_group =
1132                 container_of(ref, struct gaudi_hw_sob_group, kref);
1133         struct hl_device *hdev = hw_sob_group->hdev;
1134
1135         dev_crit(hdev->dev,
1136                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1137                 hw_sob_group->base_sob_id);
1138 }
1139
1140 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1141 {
1142         struct gaudi_collective_properties *prop;
1143         int i;
1144
1145         prop = &gaudi->collective_props;
1146
1147         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1148
1149         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1150                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1151                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1152                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1153         /* Set collective engine bit */
1154         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1155                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1156 }
1157
1158 static int gaudi_collective_init(struct hl_device *hdev)
1159 {
1160         u32 i, sob_id, reserved_sobs_per_group;
1161         struct gaudi_collective_properties *prop;
1162         struct gaudi_device *gaudi;
1163
1164         gaudi = hdev->asic_specific;
1165         prop = &gaudi->collective_props;
1166         sob_id = hdev->asic_prop.collective_first_sob;
1167
1168         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1169         reserved_sobs_per_group =
1170                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1171
1172         /* Init SOB groups */
1173         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1174                 prop->hw_sob_group[i].hdev = hdev;
1175                 prop->hw_sob_group[i].base_sob_id = sob_id;
1176                 sob_id += reserved_sobs_per_group;
1177                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1178         }
1179
1180         for (i = 0 ; i < QMAN_STREAMS; i++) {
1181                 prop->next_sob_group_val[i] = 1;
1182                 prop->curr_sob_group_idx[i] = 0;
1183                 gaudi_collective_map_sobs(hdev, i);
1184         }
1185
1186         gaudi_collective_mstr_sob_mask_set(gaudi);
1187
1188         return 0;
1189 }
1190
1191 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1192 {
1193         struct gaudi_device *gaudi = hdev->asic_specific;
1194         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1195
1196         kref_put(&cprop->hw_sob_group[sob_group].kref,
1197                                         gaudi_sob_group_hw_reset);
1198 }
1199
1200 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1201                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1202 {
1203         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1204         struct gaudi_collective_properties *cprop;
1205         struct hl_gen_wait_properties wait_prop;
1206         struct hl_sync_stream_properties *prop;
1207         struct gaudi_device *gaudi;
1208
1209         gaudi = hdev->asic_specific;
1210         cprop = &gaudi->collective_props;
1211         queue_id = job->hw_queue_id;
1212         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1213
1214         master_sob_base =
1215                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1216         master_monitor = prop->collective_mstr_mon_id[0];
1217
1218         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1219
1220         dev_dbg(hdev->dev,
1221                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1222                 master_sob_base, cprop->mstr_sob_mask[0],
1223                 cprop->next_sob_group_val[stream],
1224                 master_monitor, queue_id);
1225
1226         wait_prop.data = (void *) job->patched_cb;
1227         wait_prop.sob_base = master_sob_base;
1228         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1229         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1230         wait_prop.mon_id = master_monitor;
1231         wait_prop.q_idx = queue_id;
1232         wait_prop.size = cb_size;
1233         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1234
1235         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1236         master_monitor = prop->collective_mstr_mon_id[1];
1237
1238         dev_dbg(hdev->dev,
1239                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1240                 master_sob_base, cprop->mstr_sob_mask[1],
1241                 cprop->next_sob_group_val[stream],
1242                 master_monitor, queue_id);
1243
1244         wait_prop.sob_base = master_sob_base;
1245         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1246         wait_prop.mon_id = master_monitor;
1247         wait_prop.size = cb_size;
1248         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1249 }
1250
1251 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1252                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1253 {
1254         struct hl_gen_wait_properties wait_prop;
1255         struct hl_sync_stream_properties *prop;
1256         u32 queue_id, cb_size = 0;
1257
1258         queue_id = job->hw_queue_id;
1259         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1260
1261         if (job->cs->encaps_signals) {
1262                 /* use the encaps signal handle store earlier in the flow
1263                  * and set the SOB information from the encaps
1264                  * signals handle
1265                  */
1266                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1267                                                 cs_cmpl);
1268
1269                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1270                                 job->cs->sequence,
1271                                 cs_cmpl->hw_sob->sob_id,
1272                                 cs_cmpl->sob_val);
1273         }
1274
1275         /* Add to wait CBs using slave monitor */
1276         wait_prop.data = (void *) job->user_cb;
1277         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1278         wait_prop.sob_mask = 0x1;
1279         wait_prop.sob_val = cs_cmpl->sob_val;
1280         wait_prop.mon_id = prop->collective_slave_mon_id;
1281         wait_prop.q_idx = queue_id;
1282         wait_prop.size = cb_size;
1283
1284         dev_dbg(hdev->dev,
1285                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1286                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1287                 prop->collective_slave_mon_id, queue_id);
1288
1289         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1290
1291         dev_dbg(hdev->dev,
1292                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1293                 prop->collective_sob_id, queue_id);
1294
1295         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1296                         prop->collective_sob_id, cb_size, false);
1297 }
1298
1299 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1300 {
1301         struct hl_cs_compl *signal_cs_cmpl =
1302                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1303         struct hl_cs_compl *cs_cmpl =
1304                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1305         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1306         struct gaudi_collective_properties *cprop;
1307         u32 stream, queue_id, sob_group_offset;
1308         struct gaudi_device *gaudi;
1309         struct hl_device *hdev;
1310         struct hl_cs_job *job;
1311         struct hl_ctx *ctx;
1312
1313         ctx = cs->ctx;
1314         hdev = ctx->hdev;
1315         gaudi = hdev->asic_specific;
1316         cprop = &gaudi->collective_props;
1317
1318         if (cs->encaps_signals) {
1319                 cs_cmpl->hw_sob = handle->hw_sob;
1320                 /* at this checkpoint we only need the hw_sob pointer
1321                  * for the completion check before start going over the jobs
1322                  * of the master/slaves, the sob_value will be taken later on
1323                  * in gaudi_collective_slave_init_job depends on each
1324                  * job wait offset value.
1325                  */
1326                 cs_cmpl->sob_val = 0;
1327         } else {
1328                 /* copy the SOB id and value of the signal CS */
1329                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1330                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1331         }
1332
1333         /* check again if the signal cs already completed.
1334          * if yes then don't send any wait cs since the hw_sob
1335          * could be in reset already. if signal is not completed
1336          * then get refcount to hw_sob to prevent resetting the sob
1337          * while wait cs is not submitted.
1338          * note that this check is protected by two locks,
1339          * hw queue lock and completion object lock,
1340          * and the same completion object lock also protects
1341          * the hw_sob reset handler function.
1342          * The hw_queue lock prevent out of sync of hw_sob
1343          * refcount value, changed by signal/wait flows.
1344          */
1345         spin_lock(&signal_cs_cmpl->lock);
1346
1347         if (completion_done(&cs->signal_fence->completion)) {
1348                 spin_unlock(&signal_cs_cmpl->lock);
1349                 return -EINVAL;
1350         }
1351         /* Increment kref since all slave queues are now waiting on it */
1352         kref_get(&cs_cmpl->hw_sob->kref);
1353
1354         spin_unlock(&signal_cs_cmpl->lock);
1355
1356         /* Calculate the stream from collective master queue (1st job) */
1357         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1358         stream = job->hw_queue_id % 4;
1359         sob_group_offset =
1360                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1361
1362         list_for_each_entry(job, &cs->job_list, cs_node) {
1363                 queue_id = job->hw_queue_id;
1364
1365                 if (hdev->kernel_queues[queue_id].collective_mode ==
1366                                 HL_COLLECTIVE_MASTER)
1367                         gaudi_collective_master_init_job(hdev, job, stream,
1368                                                 sob_group_offset);
1369                 else
1370                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1371         }
1372
1373         cs_cmpl->sob_group = sob_group_offset;
1374
1375         /* Handle sob group kref and wraparound */
1376         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1377         cprop->next_sob_group_val[stream]++;
1378
1379         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1380                 /*
1381                  * Decrement as we reached the max value.
1382                  * The release function won't be called here as we've
1383                  * just incremented the refcount.
1384                  */
1385                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1386                                 gaudi_sob_group_reset_error);
1387                 cprop->next_sob_group_val[stream] = 1;
1388                 /* only two SOBs are currently in use */
1389                 cprop->curr_sob_group_idx[stream] =
1390                         (cprop->curr_sob_group_idx[stream] + 1) &
1391                                                         (HL_RSVD_SOBS - 1);
1392
1393                 gaudi_collective_map_sobs(hdev, stream);
1394
1395                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1396                                 cprop->curr_sob_group_idx[stream], stream);
1397         }
1398
1399         mb();
1400         hl_fence_put(cs->signal_fence);
1401         cs->signal_fence = NULL;
1402
1403         return 0;
1404 }
1405
1406 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1407 {
1408         u32 cacheline_end, additional_commands;
1409
1410         cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1411         additional_commands = sizeof(struct packet_msg_prot) * 2;
1412
1413         if (user_cb_size + additional_commands > cacheline_end)
1414                 return cacheline_end - user_cb_size + additional_commands;
1415         else
1416                 return additional_commands;
1417 }
1418
1419 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1420                 struct hl_ctx *ctx, struct hl_cs *cs,
1421                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1422                 u32 encaps_signal_offset)
1423 {
1424         struct hw_queue_properties *hw_queue_prop;
1425         struct hl_cs_counters_atomic *cntr;
1426         struct hl_cs_job *job;
1427         struct hl_cb *cb;
1428         u32 cb_size;
1429         bool patched_cb;
1430
1431         cntr = &hdev->aggregated_cs_counters;
1432
1433         if (mode == HL_COLLECTIVE_MASTER) {
1434                 /* CB size of collective master queue contains
1435                  * 4 msg short packets for monitor 1 configuration
1436                  * 1 fence packet
1437                  * 4 msg short packets for monitor 2 configuration
1438                  * 1 fence packet
1439                  * 2 msg prot packets for completion and MSI
1440                  */
1441                 cb_size = sizeof(struct packet_msg_short) * 8 +
1442                                 sizeof(struct packet_fence) * 2 +
1443                                 sizeof(struct packet_msg_prot) * 2;
1444                 patched_cb = true;
1445         } else {
1446                 /* CB size of collective slave queues contains
1447                  * 4 msg short packets for monitor configuration
1448                  * 1 fence packet
1449                  * 1 additional msg short packet for sob signal
1450                  */
1451                 cb_size = sizeof(struct packet_msg_short) * 5 +
1452                                 sizeof(struct packet_fence);
1453                 patched_cb = false;
1454         }
1455
1456         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1457         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1458         if (!job) {
1459                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1460                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1461                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1462                 return -ENOMEM;
1463         }
1464
1465         /* Allocate internal mapped CB for non patched CBs */
1466         cb = hl_cb_kernel_create(hdev, cb_size,
1467                         hdev->mmu_enable && !patched_cb);
1468         if (!cb) {
1469                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1470                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1471                 kfree(job);
1472                 return -EFAULT;
1473         }
1474
1475         job->id = 0;
1476         job->cs = cs;
1477         job->user_cb = cb;
1478         atomic_inc(&job->user_cb->cs_cnt);
1479         job->user_cb_size = cb_size;
1480         job->hw_queue_id = queue_id;
1481
1482         /* since its guaranteed to have only one chunk in the collective wait
1483          * cs, we can use this chunk to set the encapsulated signal offset
1484          * in the jobs.
1485          */
1486         if (cs->encaps_signals)
1487                 job->encaps_sig_wait_offset = encaps_signal_offset;
1488
1489         /*
1490          * No need in parsing, user CB is the patched CB.
1491          * We call hl_cb_destroy() out of two reasons - we don't need
1492          * the CB in the CB idr anymore and to decrement its refcount as
1493          * it was incremented inside hl_cb_kernel_create().
1494          */
1495         if (patched_cb)
1496                 job->patched_cb = job->user_cb;
1497         else
1498                 job->patched_cb = NULL;
1499
1500         job->job_cb_size = job->user_cb_size;
1501         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1502
1503         /* increment refcount as for external queues we get completion */
1504         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1505                 cs_get(cs);
1506
1507         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1508
1509         list_add_tail(&job->cs_node, &cs->job_list);
1510
1511         hl_debugfs_add_job(hdev, job);
1512
1513         return 0;
1514 }
1515
1516 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1517                 struct hl_ctx *ctx, struct hl_cs *cs,
1518                 u32 wait_queue_id, u32 collective_engine_id,
1519                 u32 encaps_signal_offset)
1520 {
1521         struct gaudi_device *gaudi = hdev->asic_specific;
1522         struct hw_queue_properties *hw_queue_prop;
1523         u32 queue_id, collective_queue, num_jobs;
1524         u32 stream, nic_queue, nic_idx = 0;
1525         bool skip;
1526         int i, rc = 0;
1527
1528         /* Verify wait queue id is configured as master */
1529         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1530         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1531                 dev_err(hdev->dev,
1532                         "Queue %d is not configured as collective master\n",
1533                         wait_queue_id);
1534                 return -EINVAL;
1535         }
1536
1537         /* Verify engine id is supported */
1538         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1539                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1540                 dev_err(hdev->dev,
1541                         "Collective wait does not support engine %u\n",
1542                         collective_engine_id);
1543                 return -EINVAL;
1544         }
1545
1546         stream = wait_queue_id % 4;
1547
1548         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1549                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1550         else
1551                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1552
1553         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1554         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1555
1556         /* First job goes to the collective master queue, it will wait for
1557          * the collective slave queues to finish execution.
1558          * The synchronization is done using two monitors:
1559          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1560          * reduction engine (DMA5/TPC7).
1561          *
1562          * Rest of the jobs goes to the collective slave queues which will
1563          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1564          */
1565         for (i = 0 ; i < num_jobs ; i++) {
1566                 if (i == 0) {
1567                         queue_id = wait_queue_id;
1568                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1569                                 HL_COLLECTIVE_MASTER, queue_id,
1570                                 wait_queue_id, encaps_signal_offset);
1571                 } else {
1572                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1573                                 if (gaudi->hw_cap_initialized &
1574                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1575                                         skip = false;
1576                                 else
1577                                         skip = true;
1578
1579                                 queue_id = nic_queue;
1580                                 nic_queue += 4;
1581                                 nic_idx++;
1582
1583                                 if (skip)
1584                                         continue;
1585                         } else {
1586                                 queue_id = collective_queue;
1587                         }
1588
1589                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1590                                 HL_COLLECTIVE_SLAVE, queue_id,
1591                                 wait_queue_id, encaps_signal_offset);
1592                 }
1593
1594                 if (rc)
1595                         return rc;
1596         }
1597
1598         return rc;
1599 }
1600
1601 static int gaudi_late_init(struct hl_device *hdev)
1602 {
1603         struct gaudi_device *gaudi = hdev->asic_specific;
1604         int rc;
1605
1606         rc = gaudi->cpucp_info_get(hdev);
1607         if (rc) {
1608                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1609                 return rc;
1610         }
1611
1612         if ((hdev->card_type == cpucp_card_type_pci) &&
1613                         (hdev->nic_ports_mask & 0x3)) {
1614                 dev_info(hdev->dev,
1615                         "PCI card detected, only 8 ports are enabled\n");
1616                 hdev->nic_ports_mask &= ~0x3;
1617
1618                 /* Stop and disable unused NIC QMANs */
1619                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1620                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1621                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1622
1623                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1624                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1625                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1626
1627                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1628                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1629
1630                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1631         }
1632
1633         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1634         if (rc) {
1635                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1636                 return rc;
1637         }
1638
1639         /* Scrub both SRAM and DRAM */
1640         rc = hdev->asic_funcs->scrub_device_mem(hdev);
1641         if (rc)
1642                 goto disable_pci_access;
1643
1644         rc = gaudi_fetch_psoc_frequency(hdev);
1645         if (rc) {
1646                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1647                 goto disable_pci_access;
1648         }
1649
1650         rc = gaudi_mmu_clear_pgt_range(hdev);
1651         if (rc) {
1652                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1653                 goto disable_pci_access;
1654         }
1655
1656         rc = gaudi_init_tpc_mem(hdev);
1657         if (rc) {
1658                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1659                 goto disable_pci_access;
1660         }
1661
1662         rc = gaudi_collective_init(hdev);
1663         if (rc) {
1664                 dev_err(hdev->dev, "Failed to init collective\n");
1665                 goto disable_pci_access;
1666         }
1667
1668         /* We only support a single ASID for the user, so for the sake of optimization, just
1669          * initialize the ASID one time during device initialization with the fixed value of 1
1670          */
1671         gaudi_mmu_prepare(hdev, 1);
1672
1673         hl_fw_set_pll_profile(hdev);
1674
1675         return 0;
1676
1677 disable_pci_access:
1678         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1679
1680         return rc;
1681 }
1682
1683 static void gaudi_late_fini(struct hl_device *hdev)
1684 {
1685         const struct hwmon_channel_info **channel_info_arr;
1686         int i = 0;
1687
1688         if (!hdev->hl_chip_info->info)
1689                 return;
1690
1691         channel_info_arr = hdev->hl_chip_info->info;
1692
1693         while (channel_info_arr[i]) {
1694                 kfree(channel_info_arr[i]->config);
1695                 kfree(channel_info_arr[i]);
1696                 i++;
1697         }
1698
1699         kfree(channel_info_arr);
1700
1701         hdev->hl_chip_info->info = NULL;
1702 }
1703
1704 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1705 {
1706         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1707         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1708         int i, j, rc = 0;
1709
1710         /*
1711          * The device CPU works with 40-bits addresses, while bit 39 must be set
1712          * to '1' when accessing the host.
1713          * Bits 49:39 of the full host address are saved for a later
1714          * configuration of the HW to perform extension to 50 bits.
1715          * Because there is a single HW register that holds the extension bits,
1716          * these bits must be identical in all allocated range.
1717          */
1718
1719         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1720                 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1721                                                                 &dma_addr_arr[i],
1722                                                                 GFP_KERNEL | __GFP_ZERO);
1723                 if (!virt_addr_arr[i]) {
1724                         rc = -ENOMEM;
1725                         goto free_dma_mem_arr;
1726                 }
1727
1728                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1729                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1730                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1731                         break;
1732         }
1733
1734         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1735                 dev_err(hdev->dev,
1736                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1737                 rc = -EFAULT;
1738                 goto free_dma_mem_arr;
1739         }
1740
1741         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1742         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1743         hdev->cpu_pci_msb_addr =
1744                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1745
1746         if (!hdev->asic_prop.fw_security_enabled)
1747                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1748
1749 free_dma_mem_arr:
1750         for (j = 0 ; j < i ; j++)
1751                 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1752                                                 dma_addr_arr[j]);
1753
1754         return rc;
1755 }
1756
1757 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1758 {
1759         struct gaudi_device *gaudi = hdev->asic_specific;
1760         struct gaudi_internal_qman_info *q;
1761         u32 i;
1762
1763         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1764                 q = &gaudi->internal_qmans[i];
1765                 if (!q->pq_kernel_addr)
1766                         continue;
1767                 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1768         }
1769 }
1770
1771 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1772 {
1773         struct gaudi_device *gaudi = hdev->asic_specific;
1774         struct gaudi_internal_qman_info *q;
1775         int rc, i;
1776
1777         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1778                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1779                         continue;
1780
1781                 q = &gaudi->internal_qmans[i];
1782
1783                 switch (i) {
1784                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1785                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1786                         break;
1787                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1788                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1789                         break;
1790                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1791                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1792                         break;
1793                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1794                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1795                         break;
1796                 default:
1797                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1798                         rc = -EINVAL;
1799                         goto free_internal_qmans_pq_mem;
1800                 }
1801
1802                 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1803                                                                 GFP_KERNEL | __GFP_ZERO);
1804                 if (!q->pq_kernel_addr) {
1805                         rc = -ENOMEM;
1806                         goto free_internal_qmans_pq_mem;
1807                 }
1808         }
1809
1810         return 0;
1811
1812 free_internal_qmans_pq_mem:
1813         gaudi_free_internal_qmans_pq_mem(hdev);
1814         return rc;
1815 }
1816
1817 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1818 {
1819         struct asic_fixed_properties *prop = &hdev->asic_prop;
1820         struct pci_mem_region *region;
1821
1822         /* CFG */
1823         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1824         region->region_base = CFG_BASE;
1825         region->region_size = CFG_SIZE;
1826         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1827         region->bar_size = CFG_BAR_SIZE;
1828         region->bar_id = CFG_BAR_ID;
1829         region->used = 1;
1830
1831         /* SRAM */
1832         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1833         region->region_base = SRAM_BASE_ADDR;
1834         region->region_size = SRAM_SIZE;
1835         region->offset_in_bar = 0;
1836         region->bar_size = SRAM_BAR_SIZE;
1837         region->bar_id = SRAM_BAR_ID;
1838         region->used = 1;
1839
1840         /* DRAM */
1841         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1842         region->region_base = DRAM_PHYS_BASE;
1843         region->region_size = hdev->asic_prop.dram_size;
1844         region->offset_in_bar = 0;
1845         region->bar_size = prop->dram_pci_bar_size;
1846         region->bar_id = HBM_BAR_ID;
1847         region->used = 1;
1848
1849         /* SP SRAM */
1850         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1851         region->region_base = PSOC_SCRATCHPAD_ADDR;
1852         region->region_size = PSOC_SCRATCHPAD_SIZE;
1853         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1854         region->bar_size = CFG_BAR_SIZE;
1855         region->bar_id = CFG_BAR_ID;
1856         region->used = 1;
1857 }
1858
1859 static int gaudi_sw_init(struct hl_device *hdev)
1860 {
1861         struct gaudi_device *gaudi;
1862         u32 i, event_id = 0;
1863         int rc;
1864
1865         /* Allocate device structure */
1866         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1867         if (!gaudi)
1868                 return -ENOMEM;
1869
1870         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1871                 if (gaudi_irq_map_table[i].valid) {
1872                         if (event_id == GAUDI_EVENT_SIZE) {
1873                                 dev_err(hdev->dev,
1874                                         "Event array exceeds the limit of %u events\n",
1875                                         GAUDI_EVENT_SIZE);
1876                                 rc = -EINVAL;
1877                                 goto free_gaudi_device;
1878                         }
1879
1880                         gaudi->events[event_id++] =
1881                                         gaudi_irq_map_table[i].fc_id;
1882                 }
1883         }
1884
1885         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1886
1887         hdev->asic_specific = gaudi;
1888
1889         /* Create DMA pool for small allocations */
1890         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1891                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1892         if (!hdev->dma_pool) {
1893                 dev_err(hdev->dev, "failed to create DMA pool\n");
1894                 rc = -ENOMEM;
1895                 goto free_gaudi_device;
1896         }
1897
1898         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1899         if (rc)
1900                 goto free_dma_pool;
1901
1902         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1903         if (!hdev->cpu_accessible_dma_pool) {
1904                 dev_err(hdev->dev,
1905                         "Failed to create CPU accessible DMA pool\n");
1906                 rc = -ENOMEM;
1907                 goto free_cpu_dma_mem;
1908         }
1909
1910         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1911                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1912                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1913         if (rc) {
1914                 dev_err(hdev->dev,
1915                         "Failed to add memory to CPU accessible DMA pool\n");
1916                 rc = -EFAULT;
1917                 goto free_cpu_accessible_dma_pool;
1918         }
1919
1920         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1921         if (rc)
1922                 goto free_cpu_accessible_dma_pool;
1923
1924         spin_lock_init(&gaudi->hw_queues_lock);
1925
1926         hdev->supports_sync_stream = true;
1927         hdev->supports_coresight = true;
1928         hdev->supports_staged_submission = true;
1929         hdev->supports_wait_for_multi_cs = true;
1930
1931         hdev->asic_funcs->set_pci_memory_regions(hdev);
1932         hdev->stream_master_qid_arr =
1933                                 hdev->asic_funcs->get_stream_master_qid_arr();
1934         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1935
1936         return 0;
1937
1938 free_cpu_accessible_dma_pool:
1939         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1940 free_cpu_dma_mem:
1941         if (!hdev->asic_prop.fw_security_enabled)
1942                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1943                                         hdev->cpu_pci_msb_addr);
1944         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1945                                         hdev->cpu_accessible_dma_address);
1946 free_dma_pool:
1947         dma_pool_destroy(hdev->dma_pool);
1948 free_gaudi_device:
1949         kfree(gaudi);
1950         return rc;
1951 }
1952
1953 static int gaudi_sw_fini(struct hl_device *hdev)
1954 {
1955         struct gaudi_device *gaudi = hdev->asic_specific;
1956
1957         gaudi_free_internal_qmans_pq_mem(hdev);
1958
1959         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1960
1961         if (!hdev->asic_prop.fw_security_enabled)
1962                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1963                                         hdev->cpu_pci_msb_addr);
1964
1965         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1966                                         hdev->cpu_accessible_dma_address);
1967
1968         dma_pool_destroy(hdev->dma_pool);
1969
1970         kfree(gaudi);
1971
1972         return 0;
1973 }
1974
1975 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1976 {
1977         struct hl_device *hdev = arg;
1978         int i;
1979
1980         if (hdev->disabled)
1981                 return IRQ_HANDLED;
1982
1983         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1984                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1985
1986         hl_irq_handler_eq(irq, &hdev->event_queue);
1987
1988         return IRQ_HANDLED;
1989 }
1990
1991 /*
1992  * For backward compatibility, new MSI interrupts should be set after the
1993  * existing CPU and NIC interrupts.
1994  */
1995 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1996                                 bool cpu_eq)
1997 {
1998         int msi_vec;
1999
2000         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
2001                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
2002                                 GAUDI_EVENT_QUEUE_MSI_IDX);
2003
2004         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
2005                         (nr + NIC_NUMBER_OF_ENGINES + 1);
2006
2007         return pci_irq_vector(hdev->pdev, msi_vec);
2008 }
2009
2010 static int gaudi_enable_msi_single(struct hl_device *hdev)
2011 {
2012         int rc, irq;
2013
2014         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2015
2016         irq = gaudi_pci_irq_vector(hdev, 0, false);
2017         rc = request_irq(irq, gaudi_irq_handler_single, 0,
2018                         "gaudi single msi", hdev);
2019         if (rc)
2020                 dev_err(hdev->dev,
2021                         "Failed to request single MSI IRQ\n");
2022
2023         return rc;
2024 }
2025
2026 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2027 {
2028         int cq_cnt = hdev->asic_prop.completion_queues_count;
2029         int rc, i, irq_cnt_init, irq;
2030
2031         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2032                 irq = gaudi_pci_irq_vector(hdev, i, false);
2033                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2034                                 &hdev->completion_queue[i]);
2035                 if (rc) {
2036                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2037                         goto free_irqs;
2038                 }
2039         }
2040
2041         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2042         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2043                                 &hdev->event_queue);
2044         if (rc) {
2045                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2046                 goto free_irqs;
2047         }
2048
2049         return 0;
2050
2051 free_irqs:
2052         for (i = 0 ; i < irq_cnt_init ; i++)
2053                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2054                                 &hdev->completion_queue[i]);
2055         return rc;
2056 }
2057
2058 static int gaudi_enable_msi(struct hl_device *hdev)
2059 {
2060         struct gaudi_device *gaudi = hdev->asic_specific;
2061         int rc;
2062
2063         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2064                 return 0;
2065
2066         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2067         if (rc < 0) {
2068                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2069                 return rc;
2070         }
2071
2072         if (rc < NUMBER_OF_INTERRUPTS) {
2073                 gaudi->multi_msi_mode = false;
2074                 rc = gaudi_enable_msi_single(hdev);
2075         } else {
2076                 gaudi->multi_msi_mode = true;
2077                 rc = gaudi_enable_msi_multi(hdev);
2078         }
2079
2080         if (rc)
2081                 goto free_pci_irq_vectors;
2082
2083         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2084
2085         return 0;
2086
2087 free_pci_irq_vectors:
2088         pci_free_irq_vectors(hdev->pdev);
2089         return rc;
2090 }
2091
2092 static void gaudi_sync_irqs(struct hl_device *hdev)
2093 {
2094         struct gaudi_device *gaudi = hdev->asic_specific;
2095         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2096
2097         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2098                 return;
2099
2100         /* Wait for all pending IRQs to be finished */
2101         if (gaudi->multi_msi_mode) {
2102                 for (i = 0 ; i < cq_cnt ; i++)
2103                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2104
2105                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2106                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2107                                                 true));
2108         } else {
2109                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2110         }
2111 }
2112
2113 static void gaudi_disable_msi(struct hl_device *hdev)
2114 {
2115         struct gaudi_device *gaudi = hdev->asic_specific;
2116         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2117
2118         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2119                 return;
2120
2121         gaudi_sync_irqs(hdev);
2122
2123         if (gaudi->multi_msi_mode) {
2124                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2125                                                 true);
2126                 free_irq(irq, &hdev->event_queue);
2127
2128                 for (i = 0 ; i < cq_cnt ; i++) {
2129                         irq = gaudi_pci_irq_vector(hdev, i, false);
2130                         free_irq(irq, &hdev->completion_queue[i]);
2131                 }
2132         } else {
2133                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2134         }
2135
2136         pci_free_irq_vectors(hdev->pdev);
2137
2138         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2139 }
2140
2141 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2142 {
2143         struct gaudi_device *gaudi = hdev->asic_specific;
2144
2145         if (hdev->asic_prop.fw_security_enabled)
2146                 return;
2147
2148         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2149                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2150                 return;
2151
2152         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2153                 return;
2154
2155         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2156                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2157         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2158                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2159         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2160                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2161         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2162                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2163         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2164                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2165         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2166                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2167         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2168                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2169         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2170                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2171
2172         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2173                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2174         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2175                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2176         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2177                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2178         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2179                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2180         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2181                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2182         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2183                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2184         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2185                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2186         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2187                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2188
2189         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2190                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2191         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2192                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2193         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2194                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2195         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2196                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2197         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2198                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2199         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2200                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2201         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2202                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2203         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2204                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2205
2206         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2207 }
2208
2209 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2210 {
2211         struct gaudi_device *gaudi = hdev->asic_specific;
2212
2213         if (hdev->asic_prop.fw_security_enabled)
2214                 return;
2215
2216         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2217                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2218                 return;
2219
2220         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2221                 return;
2222
2223         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2224                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2225         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2226                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2227         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2228                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2229         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2230                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2231         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2232                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2233         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2234                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2235         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2236                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2237         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2238                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2239
2240         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2241                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2242         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2243                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2244         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2245                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2246         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2247                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2248         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2249                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2250         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2251                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2252         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2253                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2254         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2255                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2256
2257         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2258                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2259         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2260                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2261         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2262                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2263         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2264                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2265         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2266                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2267         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2268                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2269         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2270                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2271         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2272                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2273
2274         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2275 }
2276
2277 static void gaudi_init_e2e(struct hl_device *hdev)
2278 {
2279         if (hdev->asic_prop.fw_security_enabled)
2280                 return;
2281
2282         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2283                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2284                 return;
2285
2286         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2287         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2288         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2289         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2290
2291         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2292         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2293         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2294         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2295
2296         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2297         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2298         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2299         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2300
2301         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2302         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2303         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2304         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2305
2306         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2307         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2308         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2309         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2310
2311         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2312         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2313         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2314         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2315
2316         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2317         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2318         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2319         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2320
2321         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2322         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2323         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2324         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2325
2326         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2327         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2328         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2329         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2330
2331         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2332         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2333         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2334         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2335
2336         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2337         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2338         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2339         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2340
2341         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2342         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2343         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2344         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2345
2346         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2347         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2348         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2349         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2350
2351         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2352         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2353         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2354         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2355
2356         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2357         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2358         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2359         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2360
2361         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2362         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2363         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2364         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2365
2366         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2367         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2368         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2369         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2370
2371         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2372         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2373         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2374         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2375
2376         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2377         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2378         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2379         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2380
2381         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2382         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2383         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2384         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2385
2386         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2387         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2388         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2389         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2390
2391         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2392         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2393         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2394         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2395
2396         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2397         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2398         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2399         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2400
2401         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2402         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2403         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2404         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2405
2406         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2407                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2408         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2409                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2410
2411         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2412                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2413         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2414                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2415
2416         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2417                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2418         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2419                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2420
2421         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2422                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2423         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2424                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2425
2426         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2427                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2428         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2429                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2430
2431         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2432                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2433         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2434                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2435
2436         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2437                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2438         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2439                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2440
2441         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2442                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2443         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2444                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2445
2446         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2447                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2448         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2449                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2450
2451         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2452                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2453         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2454                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2455
2456         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2457                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2458         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2459                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2460
2461         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2462                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2463         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2464                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2465
2466         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2467                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2468         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2469                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2470
2471         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2472                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2473         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2474                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2475
2476         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2477                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2478         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2479                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2480
2481         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2482                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2483         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2484                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2485
2486         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2487                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2488         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2489                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2490
2491         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2492                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2493         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2494                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2495
2496         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2497                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2498         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2499                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2500
2501         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2502                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2503         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2504                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2505
2506         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2507                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2508         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2509                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2510
2511         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2512                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2513         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2514                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2515
2516         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2517                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2518         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2519                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2520
2521         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2522                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2523         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2524                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2525 }
2526
2527 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2528 {
2529         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2530
2531         if (hdev->asic_prop.fw_security_enabled)
2532                 return;
2533
2534         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2535                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2536                 return;
2537
2538         hbm0_wr = 0x33333333;
2539         hbm0_rd = 0x77777777;
2540         hbm1_wr = 0x55555555;
2541         hbm1_rd = 0xDDDDDDDD;
2542
2543         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2544         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2545         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2546         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2547
2548         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2549         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2550         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2551         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2552
2553         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2554         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2555         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2556         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2557
2558         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2559         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2560         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2561         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2562
2563         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2564                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2565                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2566         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2567                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2568                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2569         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2570                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2571                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2572         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2573                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2574                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2575
2576         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2577                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2578                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2579         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2580                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2581                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2582         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2583                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2584                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2585         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2586                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2587                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2588 }
2589
2590 static void gaudi_init_golden_registers(struct hl_device *hdev)
2591 {
2592         u32 tpc_offset;
2593         int tpc_id, i;
2594
2595         gaudi_init_e2e(hdev);
2596         gaudi_init_hbm_cred(hdev);
2597
2598         for (tpc_id = 0, tpc_offset = 0;
2599                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2600                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2601                 /* Mask all arithmetic interrupts from TPC */
2602                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2603                 /* Set 16 cache lines */
2604                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2605                                 ICACHE_FETCH_LINE_NUM, 2);
2606         }
2607
2608         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2609         for (i = 0 ; i < 128 ; i += 8)
2610                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2611
2612         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2613         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2614         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2615         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2616 }
2617
2618 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2619                                         int qman_id, dma_addr_t qman_pq_addr)
2620 {
2621         struct cpu_dyn_regs *dyn_regs =
2622                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2623         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2624         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2625         u32 q_off, dma_qm_offset;
2626         u32 dma_qm_err_cfg, irq_handler_offset;
2627
2628         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2629
2630         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2631                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2632         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2633                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2634         so_base_en_lo = lower_32_bits(CFG_BASE +
2635                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2636         so_base_en_hi = upper_32_bits(CFG_BASE +
2637                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2638         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2639                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2640         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2641                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2642         so_base_ws_lo = lower_32_bits(CFG_BASE +
2643                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2644         so_base_ws_hi = upper_32_bits(CFG_BASE +
2645                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2646
2647         q_off = dma_qm_offset + qman_id * 4;
2648
2649         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2650         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2651
2652         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2653         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2654         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2655
2656         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2657         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2658                                                         QMAN_LDMA_SRC_OFFSET);
2659         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2660                                                         QMAN_LDMA_DST_OFFSET);
2661
2662         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2663         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2664         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2665         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2666         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2667         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2668         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2669         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2670
2671         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2672
2673         /* The following configuration is needed only once per QMAN */
2674         if (qman_id == 0) {
2675                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2676                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2677                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2678
2679                 /* Configure RAZWI IRQ */
2680                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2681                 if (hdev->stop_on_err)
2682                         dma_qm_err_cfg |=
2683                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2684
2685                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2686
2687                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2688                         lower_32_bits(CFG_BASE + irq_handler_offset));
2689                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2690                         upper_32_bits(CFG_BASE + irq_handler_offset));
2691
2692                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2693                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2694                                                                         dma_id);
2695
2696                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2697                                 QM_ARB_ERR_MSG_EN_MASK);
2698
2699                 /* Set timeout to maximum */
2700                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2701
2702                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2703                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2704
2705                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2706         }
2707 }
2708
2709 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2710 {
2711         struct cpu_dyn_regs *dyn_regs =
2712                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2713         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2714         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2715         u32 irq_handler_offset;
2716
2717         /* Set to maximum possible according to physical size */
2718         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2719         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2720
2721         /* WA for H/W bug H3-2116 */
2722         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2723
2724         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2725         if (hdev->stop_on_err)
2726                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2727
2728         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2729
2730         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2731                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2732                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2733
2734         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2735                 lower_32_bits(CFG_BASE + irq_handler_offset));
2736         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2737                 upper_32_bits(CFG_BASE + irq_handler_offset));
2738
2739         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2740                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2741         WREG32(mmDMA0_CORE_PROT + dma_offset,
2742                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2743         /* If the channel is secured, it should be in MMU bypass mode */
2744         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2745                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2746         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2747 }
2748
2749 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2750                                 u32 enable_mask)
2751 {
2752         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2753
2754         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2755 }
2756
2757 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2758 {
2759         struct gaudi_device *gaudi = hdev->asic_specific;
2760         struct hl_hw_queue *q;
2761         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2762
2763         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2764                 return;
2765
2766         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2767                 dma_id = gaudi_dma_assignment[i];
2768                 /*
2769                  * For queues after the CPU Q need to add 1 to get the correct
2770                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2771                  * order to get the correct MSI register.
2772                  */
2773                 if (dma_id > 1) {
2774                         cpu_skip = 1;
2775                         nic_skip = NIC_NUMBER_OF_ENGINES;
2776                 } else {
2777                         cpu_skip = 0;
2778                         nic_skip = 0;
2779                 }
2780
2781                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2782                         q_idx = 4 * dma_id + j + cpu_skip;
2783                         q = &hdev->kernel_queues[q_idx];
2784                         q->cq_id = cq_id++;
2785                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2786                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2787                                                 q->bus_address);
2788                 }
2789
2790                 gaudi_init_dma_core(hdev, dma_id);
2791
2792                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2793         }
2794
2795         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2796 }
2797
2798 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2799                                         int qman_id, u64 qman_base_addr)
2800 {
2801         struct cpu_dyn_regs *dyn_regs =
2802                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2803         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2804         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2805         u32 dma_qm_err_cfg, irq_handler_offset;
2806         u32 q_off, dma_qm_offset;
2807
2808         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2809
2810         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2811                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2812         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2813                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2814         so_base_en_lo = lower_32_bits(CFG_BASE +
2815                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2816         so_base_en_hi = upper_32_bits(CFG_BASE +
2817                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2818         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2819                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2820         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2821                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2822         so_base_ws_lo = lower_32_bits(CFG_BASE +
2823                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2824         so_base_ws_hi = upper_32_bits(CFG_BASE +
2825                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2826
2827         q_off = dma_qm_offset + qman_id * 4;
2828
2829         if (qman_id < 4) {
2830                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2831                                         lower_32_bits(qman_base_addr));
2832                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2833                                         upper_32_bits(qman_base_addr));
2834
2835                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2836                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2837                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2838
2839                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2840                                                         QMAN_CPDMA_SIZE_OFFSET);
2841                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2842                                                         QMAN_CPDMA_SRC_OFFSET);
2843                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2844                                                         QMAN_CPDMA_DST_OFFSET);
2845         } else {
2846                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2847                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2848                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2849
2850                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2851                                                         QMAN_LDMA_SIZE_OFFSET);
2852                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2853                                                         QMAN_LDMA_SRC_OFFSET);
2854                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2855                                                         QMAN_LDMA_DST_OFFSET);
2856
2857                 /* Configure RAZWI IRQ */
2858                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2859                 if (hdev->stop_on_err)
2860                         dma_qm_err_cfg |=
2861                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2862
2863                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2864
2865                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2866                         lower_32_bits(CFG_BASE + irq_handler_offset));
2867                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2868                         upper_32_bits(CFG_BASE + irq_handler_offset));
2869
2870                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2871                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2872                                                                         dma_id);
2873
2874                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2875                                 QM_ARB_ERR_MSG_EN_MASK);
2876
2877                 /* Set timeout to maximum */
2878                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2879
2880                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2881                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2882                                 QMAN_INTERNAL_MAKE_TRUSTED);
2883         }
2884
2885         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2886         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2887         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2888         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2889
2890         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2891         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2892                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2893                                 mtr_base_ws_lo);
2894                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2895                                 mtr_base_ws_hi);
2896                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2897                                 so_base_ws_lo);
2898                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2899                                 so_base_ws_hi);
2900         }
2901 }
2902
2903 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2904 {
2905         struct gaudi_device *gaudi = hdev->asic_specific;
2906         struct gaudi_internal_qman_info *q;
2907         u64 qman_base_addr;
2908         int i, j, dma_id, internal_q_index;
2909
2910         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2911                 return;
2912
2913         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2914                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2915
2916                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2917                          /*
2918                           * Add the CPU queue in order to get the correct queue
2919                           * number as all internal queue are placed after it
2920                           */
2921                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2922
2923                         q = &gaudi->internal_qmans[internal_q_index];
2924                         qman_base_addr = (u64) q->pq_dma_addr;
2925                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2926                                                 qman_base_addr);
2927                 }
2928
2929                 /* Initializing lower CP for HBM DMA QMAN */
2930                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2931
2932                 gaudi_init_dma_core(hdev, dma_id);
2933
2934                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2935         }
2936
2937         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2938 }
2939
2940 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2941                                         int qman_id, u64 qman_base_addr)
2942 {
2943         struct cpu_dyn_regs *dyn_regs =
2944                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2945         u32 mtr_base_lo, mtr_base_hi;
2946         u32 so_base_lo, so_base_hi;
2947         u32 irq_handler_offset;
2948         u32 q_off, mme_id;
2949         u32 mme_qm_err_cfg;
2950
2951         mtr_base_lo = lower_32_bits(CFG_BASE +
2952                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2953         mtr_base_hi = upper_32_bits(CFG_BASE +
2954                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2955         so_base_lo = lower_32_bits(CFG_BASE +
2956                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2957         so_base_hi = upper_32_bits(CFG_BASE +
2958                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2959
2960         q_off = mme_offset + qman_id * 4;
2961
2962         if (qman_id < 4) {
2963                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2964                                         lower_32_bits(qman_base_addr));
2965                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2966                                         upper_32_bits(qman_base_addr));
2967
2968                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2969                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2970                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2971
2972                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2973                                                         QMAN_CPDMA_SIZE_OFFSET);
2974                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2975                                                         QMAN_CPDMA_SRC_OFFSET);
2976                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2977                                                         QMAN_CPDMA_DST_OFFSET);
2978         } else {
2979                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2980                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2981                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2982
2983                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2984                                                         QMAN_LDMA_SIZE_OFFSET);
2985                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2986                                                         QMAN_LDMA_SRC_OFFSET);
2987                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2988                                                         QMAN_LDMA_DST_OFFSET);
2989
2990                 /* Configure RAZWI IRQ */
2991                 mme_id = mme_offset /
2992                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2993
2994                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2995                 if (hdev->stop_on_err)
2996                         mme_qm_err_cfg |=
2997                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2998
2999                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3000
3001                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3002                         lower_32_bits(CFG_BASE + irq_handler_offset));
3003                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3004                         upper_32_bits(CFG_BASE + irq_handler_offset));
3005
3006                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3007                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3008                                                                         mme_id);
3009
3010                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3011                                 QM_ARB_ERR_MSG_EN_MASK);
3012
3013                 /* Set timeout to maximum */
3014                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
3015
3016                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3017                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3018                                 QMAN_INTERNAL_MAKE_TRUSTED);
3019         }
3020
3021         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3022         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3023         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3024         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3025 }
3026
3027 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3028 {
3029         struct gaudi_device *gaudi = hdev->asic_specific;
3030         struct gaudi_internal_qman_info *q;
3031         u64 qman_base_addr;
3032         u32 mme_offset;
3033         int i, internal_q_index;
3034
3035         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3036                 return;
3037
3038         /*
3039          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3040          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3041          */
3042
3043         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3044
3045         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3046                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3047                 q = &gaudi->internal_qmans[internal_q_index];
3048                 qman_base_addr = (u64) q->pq_dma_addr;
3049                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3050                                         qman_base_addr);
3051                 if (i == 3)
3052                         mme_offset = 0;
3053         }
3054
3055         /* Initializing lower CP for MME QMANs */
3056         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3057         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3058         gaudi_init_mme_qman(hdev, 0, 4, 0);
3059
3060         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3061         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3062
3063         gaudi->hw_cap_initialized |= HW_CAP_MME;
3064 }
3065
3066 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3067                                 int qman_id, u64 qman_base_addr)
3068 {
3069         struct cpu_dyn_regs *dyn_regs =
3070                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3071         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3072         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3073         u32 tpc_qm_err_cfg, irq_handler_offset;
3074         u32 q_off, tpc_id;
3075
3076         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3077                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3078         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3079                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3080         so_base_en_lo = lower_32_bits(CFG_BASE +
3081                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3082         so_base_en_hi = upper_32_bits(CFG_BASE +
3083                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3084         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3085                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3086         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3087                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3088         so_base_ws_lo = lower_32_bits(CFG_BASE +
3089                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3090         so_base_ws_hi = upper_32_bits(CFG_BASE +
3091                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3092
3093         q_off = tpc_offset + qman_id * 4;
3094
3095         tpc_id = tpc_offset /
3096                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3097
3098         if (qman_id < 4) {
3099                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3100                                         lower_32_bits(qman_base_addr));
3101                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3102                                         upper_32_bits(qman_base_addr));
3103
3104                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3105                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3106                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3107
3108                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3109                                                         QMAN_CPDMA_SIZE_OFFSET);
3110                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3111                                                         QMAN_CPDMA_SRC_OFFSET);
3112                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3113                                                         QMAN_CPDMA_DST_OFFSET);
3114         } else {
3115                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3116                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3117                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3118
3119                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3120                                                         QMAN_LDMA_SIZE_OFFSET);
3121                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3122                                                         QMAN_LDMA_SRC_OFFSET);
3123                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3124                                                         QMAN_LDMA_DST_OFFSET);
3125
3126                 /* Configure RAZWI IRQ */
3127                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3128                 if (hdev->stop_on_err)
3129                         tpc_qm_err_cfg |=
3130                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3131
3132                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3133
3134                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3135                         lower_32_bits(CFG_BASE + irq_handler_offset));
3136                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3137                         upper_32_bits(CFG_BASE + irq_handler_offset));
3138
3139                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3140                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3141                                                                         tpc_id);
3142
3143                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3144                                 QM_ARB_ERR_MSG_EN_MASK);
3145
3146                 /* Set timeout to maximum */
3147                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3148
3149                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3150                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3151                                 QMAN_INTERNAL_MAKE_TRUSTED);
3152         }
3153
3154         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3155         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3156         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3157         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3158
3159         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3160         if (tpc_id == 6) {
3161                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3162                                 mtr_base_ws_lo);
3163                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3164                                 mtr_base_ws_hi);
3165                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3166                                 so_base_ws_lo);
3167                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3168                                 so_base_ws_hi);
3169         }
3170 }
3171
3172 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3173 {
3174         struct gaudi_device *gaudi = hdev->asic_specific;
3175         struct gaudi_internal_qman_info *q;
3176         u64 qman_base_addr;
3177         u32 so_base_hi, tpc_offset = 0;
3178         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3179                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3180         int i, tpc_id, internal_q_index;
3181
3182         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3183                 return;
3184
3185         so_base_hi = upper_32_bits(CFG_BASE +
3186                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3187
3188         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3189                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3190                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3191                                                 tpc_id * QMAN_STREAMS + i;
3192                         q = &gaudi->internal_qmans[internal_q_index];
3193                         qman_base_addr = (u64) q->pq_dma_addr;
3194                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3195                                                 qman_base_addr);
3196
3197                         if (i == 3) {
3198                                 /* Initializing lower CP for TPC QMAN */
3199                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3200
3201                                 /* Enable the QMAN and TPC channel */
3202                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3203                                                 QMAN_TPC_ENABLE);
3204                         }
3205                 }
3206
3207                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3208                                 so_base_hi);
3209
3210                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3211
3212                 gaudi->hw_cap_initialized |=
3213                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3214         }
3215 }
3216
3217 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3218                                 int qman_id, u64 qman_base_addr, int nic_id)
3219 {
3220         struct cpu_dyn_regs *dyn_regs =
3221                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3222         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3223         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3224         u32 nic_qm_err_cfg, irq_handler_offset;
3225         u32 q_off;
3226
3227         mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3228                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3229         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3230                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3231         so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3232                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3233         so_base_en_hi = upper_32_bits(CFG_BASE +
3234                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3235         mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3236                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3237         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3238                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3239         so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3240                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3241         so_base_ws_hi = upper_32_bits(CFG_BASE +
3242                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3243
3244         q_off = nic_offset + qman_id * 4;
3245
3246         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3247         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3248
3249         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3250         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3251         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3252
3253         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3254                                                         QMAN_LDMA_SIZE_OFFSET);
3255         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3256                                                         QMAN_LDMA_SRC_OFFSET);
3257         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3258                                                         QMAN_LDMA_DST_OFFSET);
3259
3260         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3261         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3262         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3263         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3264
3265         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3266         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3267         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3268         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3269         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3270
3271         if (qman_id == 0) {
3272                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3273                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3274                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3275
3276                 /* Configure RAZWI IRQ */
3277                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3278                 if (hdev->stop_on_err)
3279                         nic_qm_err_cfg |=
3280                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3281
3282                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3283
3284                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3285                         lower_32_bits(CFG_BASE + irq_handler_offset));
3286                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3287                         upper_32_bits(CFG_BASE + irq_handler_offset));
3288
3289                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3290                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3291                                                                         nic_id);
3292
3293                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3294                                 QM_ARB_ERR_MSG_EN_MASK);
3295
3296                 /* Set timeout to maximum */
3297                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3298
3299                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3300                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3301                                 QMAN_INTERNAL_MAKE_TRUSTED);
3302         }
3303 }
3304
3305 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3306 {
3307         struct gaudi_device *gaudi = hdev->asic_specific;
3308         struct gaudi_internal_qman_info *q;
3309         u64 qman_base_addr;
3310         u32 nic_offset = 0;
3311         u32 nic_delta_between_qmans =
3312                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3313         u32 nic_delta_between_nics =
3314                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3315         int i, nic_id, internal_q_index;
3316
3317         if (!hdev->nic_ports_mask)
3318                 return;
3319
3320         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3321                 return;
3322
3323         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3324
3325         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3326                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3327                         nic_offset += nic_delta_between_qmans;
3328                         if (nic_id & 1) {
3329                                 nic_offset -= (nic_delta_between_qmans * 2);
3330                                 nic_offset += nic_delta_between_nics;
3331                         }
3332                         continue;
3333                 }
3334
3335                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3336                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3337                                                 nic_id * QMAN_STREAMS + i;
3338                         q = &gaudi->internal_qmans[internal_q_index];
3339                         qman_base_addr = (u64) q->pq_dma_addr;
3340                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3341                                                 qman_base_addr, nic_id);
3342                 }
3343
3344                 /* Enable the QMAN */
3345                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3346
3347                 nic_offset += nic_delta_between_qmans;
3348                 if (nic_id & 1) {
3349                         nic_offset -= (nic_delta_between_qmans * 2);
3350                         nic_offset += nic_delta_between_nics;
3351                 }
3352
3353                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3354         }
3355 }
3356
3357 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3358 {
3359         struct gaudi_device *gaudi = hdev->asic_specific;
3360
3361         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3362                 return;
3363
3364         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3365         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3366         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3367 }
3368
3369 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3370 {
3371         struct gaudi_device *gaudi = hdev->asic_specific;
3372
3373         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3374                 return;
3375
3376         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3377         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3378         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3379         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3380         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3381 }
3382
3383 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3384 {
3385         struct gaudi_device *gaudi = hdev->asic_specific;
3386
3387         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3388                 return;
3389
3390         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3391         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3392 }
3393
3394 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3395 {
3396         struct gaudi_device *gaudi = hdev->asic_specific;
3397         u32 tpc_offset = 0;
3398         int tpc_id;
3399
3400         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3401                 return;
3402
3403         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3404                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3405                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3406         }
3407 }
3408
3409 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3410 {
3411         struct gaudi_device *gaudi = hdev->asic_specific;
3412         u32 nic_mask, nic_offset = 0;
3413         u32 nic_delta_between_qmans =
3414                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3415         u32 nic_delta_between_nics =
3416                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3417         int nic_id;
3418
3419         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3420                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3421
3422                 if (gaudi->hw_cap_initialized & nic_mask)
3423                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3424
3425                 nic_offset += nic_delta_between_qmans;
3426                 if (nic_id & 1) {
3427                         nic_offset -= (nic_delta_between_qmans * 2);
3428                         nic_offset += nic_delta_between_nics;
3429                 }
3430         }
3431 }
3432
3433 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3434 {
3435         struct gaudi_device *gaudi = hdev->asic_specific;
3436
3437         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3438                 return;
3439
3440         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3441         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3442         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3444 }
3445
3446 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3447 {
3448         struct gaudi_device *gaudi = hdev->asic_specific;
3449
3450         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3451                 return;
3452
3453         /* Stop CPs of HBM DMA QMANs */
3454
3455         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3456         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3457         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3458         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3459         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3460 }
3461
3462 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3463 {
3464         struct gaudi_device *gaudi = hdev->asic_specific;
3465
3466         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3467                 return;
3468
3469         /* Stop CPs of MME QMANs */
3470         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472 }
3473
3474 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3475 {
3476         struct gaudi_device *gaudi = hdev->asic_specific;
3477
3478         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3479                 return;
3480
3481         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3482         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3483         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3484         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3485         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3486         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3487         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3488         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3489 }
3490
3491 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3492 {
3493         struct gaudi_device *gaudi = hdev->asic_specific;
3494
3495         /* Stop upper CPs of QMANs */
3496
3497         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3498                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3499                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3500                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3501                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3502
3503         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3504                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3505                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3506                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3507                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3508
3509         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3510                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3511                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3512                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3513                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3514
3515         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3516                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3517                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3518                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3519                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3520
3521         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3522                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3523                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3524                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3525                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3526
3527         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3528                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3529                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3530                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3531                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3532
3533         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3534                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3535                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3536                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3537                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3538
3539         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3540                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3541                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3542                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3543                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3544
3545         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3546                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3547                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3548                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3549                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3550
3551         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3552                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3553                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3554                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3555                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3556 }
3557
3558 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3559 {
3560         struct gaudi_device *gaudi = hdev->asic_specific;
3561
3562         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3563                 return;
3564
3565         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3567         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3568 }
3569
3570 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3571 {
3572         struct gaudi_device *gaudi = hdev->asic_specific;
3573
3574         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3575                 return;
3576
3577         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3578         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3579         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3580         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3581         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3582 }
3583
3584 static void gaudi_mme_stall(struct hl_device *hdev)
3585 {
3586         struct gaudi_device *gaudi = hdev->asic_specific;
3587
3588         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3589                 return;
3590
3591         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3592         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3593         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3594         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3595         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3596         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3597         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3598         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3599         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3600         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3601         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3602         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3603         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3604         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3605         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3606         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3607         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3608 }
3609
3610 static void gaudi_tpc_stall(struct hl_device *hdev)
3611 {
3612         struct gaudi_device *gaudi = hdev->asic_specific;
3613
3614         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3615                 return;
3616
3617         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3618         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3619         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3620         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3621         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3622         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3623         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3624         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3625 }
3626
3627 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3628 {
3629         u32 qman_offset;
3630         int i;
3631
3632         if (hdev->asic_prop.fw_security_enabled)
3633                 return;
3634
3635         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3636                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3637                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3638
3639                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3640         }
3641
3642         WREG32(mmMME0_QM_CGM_CFG, 0);
3643         WREG32(mmMME0_QM_CGM_CFG1, 0);
3644         WREG32(mmMME2_QM_CGM_CFG, 0);
3645         WREG32(mmMME2_QM_CGM_CFG1, 0);
3646
3647         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3648                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3649                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3650
3651                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3652         }
3653 }
3654
3655 static void gaudi_enable_timestamp(struct hl_device *hdev)
3656 {
3657         /* Disable the timestamp counter */
3658         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3659
3660         /* Zero the lower/upper parts of the 64-bit counter */
3661         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3662         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3663
3664         /* Enable the counter */
3665         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3666 }
3667
3668 static void gaudi_disable_timestamp(struct hl_device *hdev)
3669 {
3670         /* Disable the timestamp counter */
3671         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3672 }
3673
3674 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3675 {
3676         u32 wait_timeout_ms;
3677
3678         if (hdev->pldm)
3679                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3680         else
3681                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3682
3683         if (fw_reset)
3684                 goto skip_engines;
3685
3686         gaudi_stop_nic_qmans(hdev);
3687         gaudi_stop_mme_qmans(hdev);
3688         gaudi_stop_tpc_qmans(hdev);
3689         gaudi_stop_hbm_dma_qmans(hdev);
3690         gaudi_stop_pci_dma_qmans(hdev);
3691
3692         msleep(wait_timeout_ms);
3693
3694         gaudi_pci_dma_stall(hdev);
3695         gaudi_hbm_dma_stall(hdev);
3696         gaudi_tpc_stall(hdev);
3697         gaudi_mme_stall(hdev);
3698
3699         msleep(wait_timeout_ms);
3700
3701         gaudi_disable_nic_qmans(hdev);
3702         gaudi_disable_mme_qmans(hdev);
3703         gaudi_disable_tpc_qmans(hdev);
3704         gaudi_disable_hbm_dma_qmans(hdev);
3705         gaudi_disable_pci_dma_qmans(hdev);
3706
3707         gaudi_disable_timestamp(hdev);
3708
3709 skip_engines:
3710         gaudi_disable_msi(hdev);
3711 }
3712
3713 static int gaudi_mmu_init(struct hl_device *hdev)
3714 {
3715         struct asic_fixed_properties *prop = &hdev->asic_prop;
3716         struct gaudi_device *gaudi = hdev->asic_specific;
3717         u64 hop0_addr;
3718         int rc, i;
3719
3720         if (!hdev->mmu_enable)
3721                 return 0;
3722
3723         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3724                 return 0;
3725
3726         for (i = 0 ; i < prop->max_asid ; i++) {
3727                 hop0_addr = prop->mmu_pgt_addr +
3728                                 (i * prop->mmu_hop_table_size);
3729
3730                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3731                 if (rc) {
3732                         dev_err(hdev->dev,
3733                                 "failed to set hop0 addr for asid %d\n", i);
3734                         goto err;
3735                 }
3736         }
3737
3738         /* init MMU cache manage page */
3739         WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3740         WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3741
3742         /* mem cache invalidation */
3743         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3744
3745         hl_mmu_invalidate_cache(hdev, true, 0);
3746
3747         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3748         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3749
3750         WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3751
3752         /*
3753          * The H/W expects the first PI after init to be 1. After wraparound
3754          * we'll write 0.
3755          */
3756         gaudi->mmu_cache_inv_pi = 1;
3757
3758         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3759
3760         return 0;
3761
3762 err:
3763         return rc;
3764 }
3765
3766 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3767 {
3768         void __iomem *dst;
3769
3770         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3771
3772         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3773 }
3774
3775 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3776 {
3777         void __iomem *dst;
3778
3779         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3780
3781         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3782 }
3783
3784 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3785 {
3786         struct dynamic_fw_load_mgr *dynamic_loader;
3787         struct cpu_dyn_regs *dyn_regs;
3788
3789         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3790
3791         /*
3792          * here we update initial values for few specific dynamic regs (as
3793          * before reading the first descriptor from FW those value has to be
3794          * hard-coded) in later stages of the protocol those values will be
3795          * updated automatically by reading the FW descriptor so data there
3796          * will always be up-to-date
3797          */
3798         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3799         dyn_regs->kmd_msg_to_cpu =
3800                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3801         dyn_regs->cpu_cmd_status_to_host =
3802                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3803
3804         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3805 }
3806
3807 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3808 {
3809         struct static_fw_load_mgr *static_loader;
3810
3811         static_loader = &hdev->fw_loader.static_loader;
3812
3813         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3814         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3815         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3816         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3817         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3818         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3819         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3820         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3821         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3822         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3823         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3824         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3825         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3826                         GAUDI_PLDM_RESET_WAIT_MSEC :
3827                         GAUDI_CPU_RESET_WAIT_MSEC;
3828 }
3829
3830 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3831 {
3832         struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3833
3834         pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3835         pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3836         pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3837         pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3838         pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3839         pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3840 }
3841
3842 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3843 {
3844         struct asic_fixed_properties *prop = &hdev->asic_prop;
3845         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3846
3847         /* fill common fields */
3848         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3849         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3850         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3851         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3852         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3853         fw_loader->skip_bmc = !hdev->bmc_enable;
3854         fw_loader->sram_bar_id = SRAM_BAR_ID;
3855         fw_loader->dram_bar_id = HBM_BAR_ID;
3856
3857         if (prop->dynamic_fw_load)
3858                 gaudi_init_dynamic_firmware_loader(hdev);
3859         else
3860                 gaudi_init_static_firmware_loader(hdev);
3861 }
3862
3863 static int gaudi_init_cpu(struct hl_device *hdev)
3864 {
3865         struct gaudi_device *gaudi = hdev->asic_specific;
3866         int rc;
3867
3868         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3869                 return 0;
3870
3871         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3872                 return 0;
3873
3874         /*
3875          * The device CPU works with 40 bits addresses.
3876          * This register sets the extension to 50 bits.
3877          */
3878         if (!hdev->asic_prop.fw_security_enabled)
3879                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3880
3881         rc = hl_fw_init_cpu(hdev);
3882
3883         if (rc)
3884                 return rc;
3885
3886         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3887
3888         return 0;
3889 }
3890
3891 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3892 {
3893         struct cpu_dyn_regs *dyn_regs =
3894                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3895         struct asic_fixed_properties *prop = &hdev->asic_prop;
3896         struct gaudi_device *gaudi = hdev->asic_specific;
3897         u32 status, irq_handler_offset;
3898         struct hl_eq *eq;
3899         struct hl_hw_queue *cpu_pq =
3900                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3901         int err;
3902
3903         if (!hdev->cpu_queues_enable)
3904                 return 0;
3905
3906         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3907                 return 0;
3908
3909         eq = &hdev->event_queue;
3910
3911         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3912         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3913
3914         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3915         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3916
3917         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3918                         lower_32_bits(hdev->cpu_accessible_dma_address));
3919         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3920                         upper_32_bits(hdev->cpu_accessible_dma_address));
3921
3922         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3923         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3924         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3925
3926         /* Used for EQ CI */
3927         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3928
3929         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3930
3931         if (gaudi->multi_msi_mode)
3932                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3933         else
3934                 WREG32(mmCPU_IF_QUEUE_INIT,
3935                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3936
3937         irq_handler_offset = prop->gic_interrupts_enable ?
3938                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3939                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3940
3941         WREG32(irq_handler_offset,
3942                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3943
3944         err = hl_poll_timeout(
3945                 hdev,
3946                 mmCPU_IF_QUEUE_INIT,
3947                 status,
3948                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3949                 1000,
3950                 cpu_timeout);
3951
3952         if (err) {
3953                 dev_err(hdev->dev,
3954                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3955                 return -EIO;
3956         }
3957
3958         /* update FW application security bits */
3959         if (prop->fw_cpu_boot_dev_sts0_valid)
3960                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3961         if (prop->fw_cpu_boot_dev_sts1_valid)
3962                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3963
3964         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3965         return 0;
3966 }
3967
3968 static void gaudi_pre_hw_init(struct hl_device *hdev)
3969 {
3970         /* Perform read from the device to make sure device is up */
3971         RREG32(mmHW_STATE);
3972
3973         if (!hdev->asic_prop.fw_security_enabled) {
3974                 /* Set the access through PCI bars (Linux driver only) as
3975                  * secured
3976                  */
3977                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3978                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3979                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3980
3981                 /* Perform read to flush the waiting writes to ensure
3982                  * configuration was set in the device
3983                  */
3984                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3985         }
3986
3987         /*
3988          * Let's mark in the H/W that we have reached this point. We check
3989          * this value in the reset_before_init function to understand whether
3990          * we need to reset the chip before doing H/W init. This register is
3991          * cleared by the H/W upon H/W reset
3992          */
3993         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3994 }
3995
3996 static int gaudi_hw_init(struct hl_device *hdev)
3997 {
3998         struct gaudi_device *gaudi = hdev->asic_specific;
3999         int rc;
4000
4001         gaudi_pre_hw_init(hdev);
4002
4003         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4004          * So we set it here and if anyone tries to move it later to
4005          * a different address, there will be an error
4006          */
4007         if (hdev->asic_prop.iatu_done_by_fw)
4008                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4009
4010         /*
4011          * Before pushing u-boot/linux to device, need to set the hbm bar to
4012          * base address of dram
4013          */
4014         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4015                 dev_err(hdev->dev,
4016                         "failed to map HBM bar to DRAM base address\n");
4017                 return -EIO;
4018         }
4019
4020         rc = gaudi_init_cpu(hdev);
4021         if (rc) {
4022                 dev_err(hdev->dev, "failed to initialize CPU\n");
4023                 return rc;
4024         }
4025
4026         /* In case the clock gating was enabled in preboot we need to disable
4027          * it here before touching the MME/TPC registers.
4028          */
4029         gaudi_disable_clock_gating(hdev);
4030
4031         /* SRAM scrambler must be initialized after CPU is running from HBM */
4032         gaudi_init_scrambler_sram(hdev);
4033
4034         /* This is here just in case we are working without CPU */
4035         gaudi_init_scrambler_hbm(hdev);
4036
4037         gaudi_init_golden_registers(hdev);
4038
4039         rc = gaudi_mmu_init(hdev);
4040         if (rc)
4041                 return rc;
4042
4043         gaudi_init_security(hdev);
4044
4045         gaudi_init_pci_dma_qmans(hdev);
4046
4047         gaudi_init_hbm_dma_qmans(hdev);
4048
4049         gaudi_init_mme_qmans(hdev);
4050
4051         gaudi_init_tpc_qmans(hdev);
4052
4053         gaudi_init_nic_qmans(hdev);
4054
4055         gaudi_enable_timestamp(hdev);
4056
4057         /* MSI must be enabled before CPU queues and NIC are initialized */
4058         rc = gaudi_enable_msi(hdev);
4059         if (rc)
4060                 goto disable_queues;
4061
4062         /* must be called after MSI was enabled */
4063         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4064         if (rc) {
4065                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4066                         rc);
4067                 goto disable_msi;
4068         }
4069
4070         /* Perform read from the device to flush all configuration */
4071         RREG32(mmHW_STATE);
4072
4073         return 0;
4074
4075 disable_msi:
4076         gaudi_disable_msi(hdev);
4077 disable_queues:
4078         gaudi_disable_mme_qmans(hdev);
4079         gaudi_disable_pci_dma_qmans(hdev);
4080
4081         return rc;
4082 }
4083
4084 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4085 {
4086         struct cpu_dyn_regs *dyn_regs =
4087                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4088         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4089         struct gaudi_device *gaudi = hdev->asic_specific;
4090         bool driver_performs_reset;
4091
4092         if (!hard_reset) {
4093                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4094                 return;
4095         }
4096
4097         if (hdev->pldm) {
4098                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4099                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4100         } else {
4101                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4102                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4103         }
4104
4105         if (fw_reset) {
4106                 dev_dbg(hdev->dev,
4107                         "Firmware performs HARD reset, going to wait %dms\n",
4108                         reset_timeout_ms);
4109
4110                 goto skip_reset;
4111         }
4112
4113         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4114                                         !hdev->asic_prop.hard_reset_done_by_fw);
4115
4116         /* Set device to handle FLR by H/W as we will put the device CPU to
4117          * halt mode
4118          */
4119         if (driver_performs_reset)
4120                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4121                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4122
4123         /* If linux is loaded in the device CPU we need to communicate with it
4124          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4125          * registers in case of old F/Ws
4126          */
4127         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4128                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4129                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4130                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4131
4132                 WREG32(irq_handler_offset,
4133                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4134
4135                 /* This is a hail-mary attempt to revive the card in the small chance that the
4136                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4137                  * In that case, triggering reset through GIC won't help. We need to trigger the
4138                  * reset as if Linux wasn't loaded.
4139                  *
4140                  * We do it only if the reset cause was HB, because that would be the indication
4141                  * of such an event.
4142                  *
4143                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4144                  * damage.
4145                  */
4146                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4147                         if (hdev->asic_prop.hard_reset_done_by_fw)
4148                                 hl_fw_ask_hard_reset_without_linux(hdev);
4149                         else
4150                                 hl_fw_ask_halt_machine_without_linux(hdev);
4151                 }
4152         } else {
4153                 if (hdev->asic_prop.hard_reset_done_by_fw)
4154                         hl_fw_ask_hard_reset_without_linux(hdev);
4155                 else
4156                         hl_fw_ask_halt_machine_without_linux(hdev);
4157         }
4158
4159         if (driver_performs_reset) {
4160
4161                 /* Configure the reset registers. Must be done as early as
4162                  * possible in case we fail during H/W initialization
4163                  */
4164                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4165                                                 (CFG_RST_H_DMA_MASK |
4166                                                 CFG_RST_H_MME_MASK |
4167                                                 CFG_RST_H_SM_MASK |
4168                                                 CFG_RST_H_TPC_7_MASK));
4169
4170                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4171
4172                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4173                                                 (CFG_RST_H_HBM_MASK |
4174                                                 CFG_RST_H_TPC_7_MASK |
4175                                                 CFG_RST_H_NIC_MASK |
4176                                                 CFG_RST_H_SM_MASK |
4177                                                 CFG_RST_H_DMA_MASK |
4178                                                 CFG_RST_H_MME_MASK |
4179                                                 CFG_RST_H_CPU_MASK |
4180                                                 CFG_RST_H_MMU_MASK));
4181
4182                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4183                                                 (CFG_RST_L_IF_MASK |
4184                                                 CFG_RST_L_PSOC_MASK |
4185                                                 CFG_RST_L_TPC_MASK));
4186
4187                 msleep(cpu_timeout_ms);
4188
4189                 /* Tell ASIC not to re-initialize PCIe */
4190                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4191
4192                 /* Restart BTL/BLR upon hard-reset */
4193                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4194
4195                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4196                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4197
4198                 dev_dbg(hdev->dev,
4199                         "Issued HARD reset command, going to wait %dms\n",
4200                         reset_timeout_ms);
4201         } else {
4202                 dev_dbg(hdev->dev,
4203                         "Firmware performs HARD reset, going to wait %dms\n",
4204                         reset_timeout_ms);
4205         }
4206
4207 skip_reset:
4208         /*
4209          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4210          * itself is in reset. Need to wait until the reset is deasserted
4211          */
4212         msleep(reset_timeout_ms);
4213
4214         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4215         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4216                 dev_err(hdev->dev,
4217                         "Timeout while waiting for device to reset 0x%x\n",
4218                         status);
4219
4220         if (gaudi) {
4221                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4222                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4223                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4224                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4225                                                 HW_CAP_HBM_SCRAMBLER);
4226
4227                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4228
4229                 hdev->device_cpu_is_halted = false;
4230         }
4231 }
4232
4233 static int gaudi_suspend(struct hl_device *hdev)
4234 {
4235         int rc;
4236
4237         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4238         if (rc)
4239                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4240
4241         return rc;
4242 }
4243
4244 static int gaudi_resume(struct hl_device *hdev)
4245 {
4246         return gaudi_init_iatu(hdev);
4247 }
4248
4249 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4250                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4251 {
4252         int rc;
4253
4254         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4255                         VM_DONTCOPY | VM_NORESERVE;
4256
4257         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4258                                 (dma_addr - HOST_PHYS_BASE), size);
4259         if (rc)
4260                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4261
4262         return rc;
4263 }
4264
4265 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4266 {
4267         struct cpu_dyn_regs *dyn_regs =
4268                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4269         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4270         struct gaudi_device *gaudi = hdev->asic_specific;
4271         bool invalid_queue = false;
4272         int dma_id;
4273
4274         switch (hw_queue_id) {
4275         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4276                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4277                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4278                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4279                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4280                 break;
4281
4282         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4283                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4284                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4285                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4286                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4287                 break;
4288
4289         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4290                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4291                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4292                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4293                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4294                 break;
4295
4296         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4297                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4298                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4299                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4300                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4301                 break;
4302
4303         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4304                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4305                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4306                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4307                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4308                 break;
4309
4310         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4311                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4312                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4313                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4314                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4315                 break;
4316
4317         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4318                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4319                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4320                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4321                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4322                 break;
4323
4324         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4325                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4326                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4327                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4328                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4329                 break;
4330
4331         case GAUDI_QUEUE_ID_CPU_PQ:
4332                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4333                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4334                 else
4335                         invalid_queue = true;
4336                 break;
4337
4338         case GAUDI_QUEUE_ID_MME_0_0:
4339                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4340                 break;
4341
4342         case GAUDI_QUEUE_ID_MME_0_1:
4343                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4344                 break;
4345
4346         case GAUDI_QUEUE_ID_MME_0_2:
4347                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4348                 break;
4349
4350         case GAUDI_QUEUE_ID_MME_0_3:
4351                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4352                 break;
4353
4354         case GAUDI_QUEUE_ID_MME_1_0:
4355                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4356                 break;
4357
4358         case GAUDI_QUEUE_ID_MME_1_1:
4359                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4360                 break;
4361
4362         case GAUDI_QUEUE_ID_MME_1_2:
4363                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4364                 break;
4365
4366         case GAUDI_QUEUE_ID_MME_1_3:
4367                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4368                 break;
4369
4370         case GAUDI_QUEUE_ID_TPC_0_0:
4371                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4372                 break;
4373
4374         case GAUDI_QUEUE_ID_TPC_0_1:
4375                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4376                 break;
4377
4378         case GAUDI_QUEUE_ID_TPC_0_2:
4379                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4380                 break;
4381
4382         case GAUDI_QUEUE_ID_TPC_0_3:
4383                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4384                 break;
4385
4386         case GAUDI_QUEUE_ID_TPC_1_0:
4387                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4388                 break;
4389
4390         case GAUDI_QUEUE_ID_TPC_1_1:
4391                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4392                 break;
4393
4394         case GAUDI_QUEUE_ID_TPC_1_2:
4395                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4396                 break;
4397
4398         case GAUDI_QUEUE_ID_TPC_1_3:
4399                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4400                 break;
4401
4402         case GAUDI_QUEUE_ID_TPC_2_0:
4403                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4404                 break;
4405
4406         case GAUDI_QUEUE_ID_TPC_2_1:
4407                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4408                 break;
4409
4410         case GAUDI_QUEUE_ID_TPC_2_2:
4411                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4412                 break;
4413
4414         case GAUDI_QUEUE_ID_TPC_2_3:
4415                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4416                 break;
4417
4418         case GAUDI_QUEUE_ID_TPC_3_0:
4419                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4420                 break;
4421
4422         case GAUDI_QUEUE_ID_TPC_3_1:
4423                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4424                 break;
4425
4426         case GAUDI_QUEUE_ID_TPC_3_2:
4427                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4428                 break;
4429
4430         case GAUDI_QUEUE_ID_TPC_3_3:
4431                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4432                 break;
4433
4434         case GAUDI_QUEUE_ID_TPC_4_0:
4435                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4436                 break;
4437
4438         case GAUDI_QUEUE_ID_TPC_4_1:
4439                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4440                 break;
4441
4442         case GAUDI_QUEUE_ID_TPC_4_2:
4443                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4444                 break;
4445
4446         case GAUDI_QUEUE_ID_TPC_4_3:
4447                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4448                 break;
4449
4450         case GAUDI_QUEUE_ID_TPC_5_0:
4451                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4452                 break;
4453
4454         case GAUDI_QUEUE_ID_TPC_5_1:
4455                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_TPC_5_2:
4459                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4460                 break;
4461
4462         case GAUDI_QUEUE_ID_TPC_5_3:
4463                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4464                 break;
4465
4466         case GAUDI_QUEUE_ID_TPC_6_0:
4467                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4468                 break;
4469
4470         case GAUDI_QUEUE_ID_TPC_6_1:
4471                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_TPC_6_2:
4475                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4476                 break;
4477
4478         case GAUDI_QUEUE_ID_TPC_6_3:
4479                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4480                 break;
4481
4482         case GAUDI_QUEUE_ID_TPC_7_0:
4483                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4484                 break;
4485
4486         case GAUDI_QUEUE_ID_TPC_7_1:
4487                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_TPC_7_2:
4491                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4492                 break;
4493
4494         case GAUDI_QUEUE_ID_TPC_7_3:
4495                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4499                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4500                         invalid_queue = true;
4501
4502                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4503                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4507                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4508                         invalid_queue = true;
4509
4510                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4511                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4515                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4516                         invalid_queue = true;
4517
4518                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4519                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4523                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4524                         invalid_queue = true;
4525
4526                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4527                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4531                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4532                         invalid_queue = true;
4533
4534                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4535                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4539                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4540                         invalid_queue = true;
4541
4542                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4543                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4544                 break;
4545
4546         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4547                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4548                         invalid_queue = true;
4549
4550                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4551                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4552                 break;
4553
4554         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4555                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4556                         invalid_queue = true;
4557
4558                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4559                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4560                 break;
4561
4562         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4563                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4564                         invalid_queue = true;
4565
4566                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4567                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4568                 break;
4569
4570         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4571                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4572                         invalid_queue = true;
4573
4574                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4575                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4576                 break;
4577
4578         default:
4579                 invalid_queue = true;
4580         }
4581
4582         if (invalid_queue) {
4583                 /* Should never get here */
4584                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4585                         hw_queue_id);
4586                 return;
4587         }
4588
4589         db_value = pi;
4590
4591         /* ring the doorbell */
4592         WREG32(db_reg_offset, db_value);
4593
4594         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4595                 /* make sure device CPU will read latest data from host */
4596                 mb();
4597
4598                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4599                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4600                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4601
4602                 WREG32(irq_handler_offset,
4603                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4604         }
4605 }
4606
4607 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4608                                 struct hl_bd *bd)
4609 {
4610         __le64 *pbd = (__le64 *) bd;
4611
4612         /* The QMANs are on the host memory so a simple copy suffice */
4613         pqe[0] = pbd[0];
4614         pqe[1] = pbd[1];
4615 }
4616
4617 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4618                                         dma_addr_t *dma_handle, gfp_t flags)
4619 {
4620         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4621                                                 dma_handle, flags);
4622
4623         /* Shift to the device's base physical address of host memory */
4624         if (kernel_addr)
4625                 *dma_handle += HOST_PHYS_BASE;
4626
4627         return kernel_addr;
4628 }
4629
4630 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4631                 void *cpu_addr, dma_addr_t dma_handle)
4632 {
4633         /* Cancel the device's base physical address of host memory */
4634         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4635
4636         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4637 }
4638
4639 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4640 {
4641         struct asic_fixed_properties *prop = &hdev->asic_prop;
4642         u64 cur_addr = prop->dram_user_base_address;
4643         u32 chunk_size, busy;
4644         int rc, dma_id;
4645
4646         while (cur_addr < prop->dram_end_address) {
4647                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4648                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4649
4650                         chunk_size =
4651                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4652
4653                         dev_dbg(hdev->dev,
4654                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4655                                 cur_addr, cur_addr + chunk_size);
4656
4657                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4658                                         lower_32_bits(val));
4659                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4660                                         upper_32_bits(val));
4661                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4662                                                 lower_32_bits(cur_addr));
4663                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4664                                                 upper_32_bits(cur_addr));
4665                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4666                                         chunk_size);
4667                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4668                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4669                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4670
4671                         cur_addr += chunk_size;
4672
4673                         if (cur_addr == prop->dram_end_address)
4674                                 break;
4675                 }
4676
4677                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4678                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4679
4680                         rc = hl_poll_timeout(
4681                                 hdev,
4682                                 mmDMA0_CORE_STS0 + dma_offset,
4683                                 busy,
4684                                 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4685                                 1000,
4686                                 HBM_SCRUBBING_TIMEOUT_US);
4687
4688                         if (rc) {
4689                                 dev_err(hdev->dev,
4690                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4691                                         dma_id);
4692                                 return -EIO;
4693                         }
4694                 }
4695         }
4696
4697         return 0;
4698 }
4699
4700 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4701 {
4702         struct asic_fixed_properties *prop = &hdev->asic_prop;
4703         u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4704                         min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4705         u64 addr, size, val = hdev->memory_scrub_val;
4706         ktime_t timeout;
4707         int rc = 0;
4708
4709         if (!hdev->memory_scrub)
4710                 return 0;
4711
4712         timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4713         while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4714                 if (ktime_compare(ktime_get(), timeout) > 0) {
4715                         dev_err(hdev->dev, "waiting for idle timeout\n");
4716                         return -ETIMEDOUT;
4717                 }
4718                 usleep_range((1000 >> 2) + 1, 1000);
4719         }
4720
4721         /* Scrub SRAM */
4722         addr = prop->sram_user_base_address;
4723         size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4724
4725         dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4726                         addr, addr + size, val);
4727         rc = gaudi_memset_device_memory(hdev, addr, size, val);
4728         if (rc) {
4729                 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4730                 return rc;
4731         }
4732
4733         /* Scrub HBM using all DMA channels in parallel */
4734         rc = gaudi_scrub_device_dram(hdev, val);
4735         if (rc) {
4736                 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4737                 return rc;
4738         }
4739
4740         return 0;
4741 }
4742
4743 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4744                                 u32 queue_id, dma_addr_t *dma_handle,
4745                                 u16 *queue_len)
4746 {
4747         struct gaudi_device *gaudi = hdev->asic_specific;
4748         struct gaudi_internal_qman_info *q;
4749
4750         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4751                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4752                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4753                 return NULL;
4754         }
4755
4756         q = &gaudi->internal_qmans[queue_id];
4757         *dma_handle = q->pq_dma_addr;
4758         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4759
4760         return q->pq_kernel_addr;
4761 }
4762
4763 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4764                                 u16 len, u32 timeout, u64 *result)
4765 {
4766         struct gaudi_device *gaudi = hdev->asic_specific;
4767
4768         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4769                 if (result)
4770                         *result = 0;
4771                 return 0;
4772         }
4773
4774         if (!timeout)
4775                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4776
4777         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4778                                                 timeout, result);
4779 }
4780
4781 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4782 {
4783         struct packet_msg_prot *fence_pkt;
4784         dma_addr_t pkt_dma_addr;
4785         u32 fence_val, tmp, timeout_usec;
4786         dma_addr_t fence_dma_addr;
4787         u32 *fence_ptr;
4788         int rc;
4789
4790         if (hdev->pldm)
4791                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4792         else
4793                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4794
4795         fence_val = GAUDI_QMAN0_FENCE_VAL;
4796
4797         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4798         if (!fence_ptr) {
4799                 dev_err(hdev->dev,
4800                         "Failed to allocate memory for H/W queue %d testing\n",
4801                         hw_queue_id);
4802                 return -ENOMEM;
4803         }
4804
4805         *fence_ptr = 0;
4806
4807         fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4808                                                 &pkt_dma_addr);
4809         if (!fence_pkt) {
4810                 dev_err(hdev->dev,
4811                         "Failed to allocate packet for H/W queue %d testing\n",
4812                         hw_queue_id);
4813                 rc = -ENOMEM;
4814                 goto free_fence_ptr;
4815         }
4816
4817         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4818         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4819         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4820
4821         fence_pkt->ctl = cpu_to_le32(tmp);
4822         fence_pkt->value = cpu_to_le32(fence_val);
4823         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4824
4825         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4826                                         sizeof(struct packet_msg_prot),
4827                                         pkt_dma_addr);
4828         if (rc) {
4829                 dev_err(hdev->dev,
4830                         "Failed to send fence packet to H/W queue %d\n",
4831                         hw_queue_id);
4832                 goto free_pkt;
4833         }
4834
4835         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4836                                         1000, timeout_usec, true);
4837
4838         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4839
4840         if (rc == -ETIMEDOUT) {
4841                 dev_err(hdev->dev,
4842                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4843                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4844                 rc = -EIO;
4845         }
4846
4847 free_pkt:
4848         hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4849 free_fence_ptr:
4850         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4851         return rc;
4852 }
4853
4854 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4855 {
4856         struct gaudi_device *gaudi = hdev->asic_specific;
4857
4858         /*
4859          * check capability here as send_cpu_message() won't update the result
4860          * value if no capability
4861          */
4862         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4863                 return 0;
4864
4865         return hl_fw_test_cpu_queue(hdev);
4866 }
4867
4868 static int gaudi_test_queues(struct hl_device *hdev)
4869 {
4870         int i, rc, ret_val = 0;
4871
4872         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4873                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4874                         rc = gaudi_test_queue(hdev, i);
4875                         if (rc)
4876                                 ret_val = -EINVAL;
4877                 }
4878         }
4879
4880         rc = gaudi_test_cpu_queue(hdev);
4881         if (rc)
4882                 ret_val = -EINVAL;
4883
4884         return ret_val;
4885 }
4886
4887 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4888                 gfp_t mem_flags, dma_addr_t *dma_handle)
4889 {
4890         void *kernel_addr;
4891
4892         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4893                 return NULL;
4894
4895         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4896
4897         /* Shift to the device's base physical address of host memory */
4898         if (kernel_addr)
4899                 *dma_handle += HOST_PHYS_BASE;
4900
4901         return kernel_addr;
4902 }
4903
4904 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4905                         dma_addr_t dma_addr)
4906 {
4907         /* Cancel the device's base physical address of host memory */
4908         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4909
4910         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4911 }
4912
4913 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4914                                         size_t size, dma_addr_t *dma_handle)
4915 {
4916         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4917 }
4918
4919 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4920                                                 size_t size, void *vaddr)
4921 {
4922         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4923 }
4924
4925 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4926 {
4927         struct scatterlist *sg, *sg_next_iter;
4928         u32 count, dma_desc_cnt;
4929         u64 len, len_next;
4930         dma_addr_t addr, addr_next;
4931
4932         dma_desc_cnt = 0;
4933
4934         for_each_sgtable_dma_sg(sgt, sg, count) {
4935                 len = sg_dma_len(sg);
4936                 addr = sg_dma_address(sg);
4937
4938                 if (len == 0)
4939                         break;
4940
4941                 while ((count + 1) < sgt->nents) {
4942                         sg_next_iter = sg_next(sg);
4943                         len_next = sg_dma_len(sg_next_iter);
4944                         addr_next = sg_dma_address(sg_next_iter);
4945
4946                         if (len_next == 0)
4947                                 break;
4948
4949                         if ((addr + len == addr_next) &&
4950                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4951                                 len += len_next;
4952                                 count++;
4953                                 sg = sg_next_iter;
4954                         } else {
4955                                 break;
4956                         }
4957                 }
4958
4959                 dma_desc_cnt++;
4960         }
4961
4962         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4963 }
4964
4965 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4966                                 struct hl_cs_parser *parser,
4967                                 struct packet_lin_dma *user_dma_pkt,
4968                                 u64 addr, enum dma_data_direction dir)
4969 {
4970         struct hl_userptr *userptr;
4971         int rc;
4972
4973         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4974                         parser->job_userptr_list, &userptr))
4975                 goto already_pinned;
4976
4977         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4978         if (!userptr)
4979                 return -ENOMEM;
4980
4981         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4982                                 userptr);
4983         if (rc)
4984                 goto free_userptr;
4985
4986         list_add_tail(&userptr->job_node, parser->job_userptr_list);
4987
4988         rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4989         if (rc) {
4990                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4991                 goto unpin_memory;
4992         }
4993
4994         userptr->dma_mapped = true;
4995         userptr->dir = dir;
4996
4997 already_pinned:
4998         parser->patched_cb_size +=
4999                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5000
5001         return 0;
5002
5003 unpin_memory:
5004         list_del(&userptr->job_node);
5005         hl_unpin_host_memory(hdev, userptr);
5006 free_userptr:
5007         kfree(userptr);
5008         return rc;
5009 }
5010
5011 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5012                                 struct hl_cs_parser *parser,
5013                                 struct packet_lin_dma *user_dma_pkt,
5014                                 bool src_in_host)
5015 {
5016         enum dma_data_direction dir;
5017         bool skip_host_mem_pin = false, user_memset;
5018         u64 addr;
5019         int rc = 0;
5020
5021         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5022                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5023                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5024
5025         if (src_in_host) {
5026                 if (user_memset)
5027                         skip_host_mem_pin = true;
5028
5029                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5030                 dir = DMA_TO_DEVICE;
5031                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5032         } else {
5033                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5034                 dir = DMA_FROM_DEVICE;
5035                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5036                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5037                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5038         }
5039
5040         if (skip_host_mem_pin)
5041                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5042         else
5043                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5044                                                 addr, dir);
5045
5046         return rc;
5047 }
5048
5049 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5050                                 struct hl_cs_parser *parser,
5051                                 struct packet_lin_dma *user_dma_pkt)
5052 {
5053         bool src_in_host = false;
5054         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5055                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5056                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5057
5058         dev_dbg(hdev->dev, "DMA packet details:\n");
5059         dev_dbg(hdev->dev, "source == 0x%llx\n",
5060                                 le64_to_cpu(user_dma_pkt->src_addr));
5061         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5062         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5063
5064         /*
5065          * Special handling for DMA with size 0. Bypass all validations
5066          * because no transactions will be done except for WR_COMP, which
5067          * is not a security issue
5068          */
5069         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5070                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5071                 return 0;
5072         }
5073
5074         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5075                 src_in_host = true;
5076
5077         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5078                                                 src_in_host);
5079 }
5080
5081 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5082                                         struct hl_cs_parser *parser,
5083                                         struct packet_load_and_exe *user_pkt)
5084 {
5085         u32 cfg;
5086
5087         cfg = le32_to_cpu(user_pkt->cfg);
5088
5089         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5090                 dev_err(hdev->dev,
5091                         "User not allowed to use Load and Execute\n");
5092                 return -EPERM;
5093         }
5094
5095         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5096
5097         return 0;
5098 }
5099
5100 static int gaudi_validate_cb(struct hl_device *hdev,
5101                         struct hl_cs_parser *parser, bool is_mmu)
5102 {
5103         u32 cb_parsed_length = 0;
5104         int rc = 0;
5105
5106         parser->patched_cb_size = 0;
5107
5108         /* cb_user_size is more than 0 so loop will always be executed */
5109         while (cb_parsed_length < parser->user_cb_size) {
5110                 enum packet_id pkt_id;
5111                 u16 pkt_size;
5112                 struct gaudi_packet *user_pkt;
5113
5114                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5115
5116                 pkt_id = (enum packet_id) (
5117                                 (le64_to_cpu(user_pkt->header) &
5118                                 PACKET_HEADER_PACKET_ID_MASK) >>
5119                                         PACKET_HEADER_PACKET_ID_SHIFT);
5120
5121                 if (!validate_packet_id(pkt_id)) {
5122                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5123                         rc = -EINVAL;
5124                         break;
5125                 }
5126
5127                 pkt_size = gaudi_packet_sizes[pkt_id];
5128                 cb_parsed_length += pkt_size;
5129                 if (cb_parsed_length > parser->user_cb_size) {
5130                         dev_err(hdev->dev,
5131                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5132                         rc = -EINVAL;
5133                         break;
5134                 }
5135
5136                 switch (pkt_id) {
5137                 case PACKET_MSG_PROT:
5138                         dev_err(hdev->dev,
5139                                 "User not allowed to use MSG_PROT\n");
5140                         rc = -EPERM;
5141                         break;
5142
5143                 case PACKET_CP_DMA:
5144                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5145                         rc = -EPERM;
5146                         break;
5147
5148                 case PACKET_STOP:
5149                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5150                         rc = -EPERM;
5151                         break;
5152
5153                 case PACKET_WREG_BULK:
5154                         dev_err(hdev->dev,
5155                                 "User not allowed to use WREG_BULK\n");
5156                         rc = -EPERM;
5157                         break;
5158
5159                 case PACKET_LOAD_AND_EXE:
5160                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5161                                 (struct packet_load_and_exe *) user_pkt);
5162                         break;
5163
5164                 case PACKET_LIN_DMA:
5165                         parser->contains_dma_pkt = true;
5166                         if (is_mmu)
5167                                 parser->patched_cb_size += pkt_size;
5168                         else
5169                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5170                                         (struct packet_lin_dma *) user_pkt);
5171                         break;
5172
5173                 case PACKET_WREG_32:
5174                 case PACKET_MSG_LONG:
5175                 case PACKET_MSG_SHORT:
5176                 case PACKET_REPEAT:
5177                 case PACKET_FENCE:
5178                 case PACKET_NOP:
5179                 case PACKET_ARB_POINT:
5180                         parser->patched_cb_size += pkt_size;
5181                         break;
5182
5183                 default:
5184                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5185                                 pkt_id);
5186                         rc = -EINVAL;
5187                         break;
5188                 }
5189
5190                 if (rc)
5191                         break;
5192         }
5193
5194         /*
5195          * The new CB should have space at the end for two MSG_PROT packets:
5196          * 1. Optional NOP padding for cacheline alignment
5197          * 2. A packet that will act as a completion packet
5198          * 3. A packet that will generate MSI interrupt
5199          */
5200         if (parser->completion)
5201                 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5202                         parser->patched_cb_size);
5203
5204         return rc;
5205 }
5206
5207 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5208                                 struct hl_cs_parser *parser,
5209                                 struct packet_lin_dma *user_dma_pkt,
5210                                 struct packet_lin_dma *new_dma_pkt,
5211                                 u32 *new_dma_pkt_size)
5212 {
5213         struct hl_userptr *userptr;
5214         struct scatterlist *sg, *sg_next_iter;
5215         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5216         u64 len, len_next;
5217         dma_addr_t dma_addr, dma_addr_next;
5218         u64 device_memory_addr, addr;
5219         enum dma_data_direction dir;
5220         struct sg_table *sgt;
5221         bool src_in_host = false;
5222         bool skip_host_mem_pin = false;
5223         bool user_memset;
5224
5225         ctl = le32_to_cpu(user_dma_pkt->ctl);
5226
5227         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5228                 src_in_host = true;
5229
5230         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5231                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5232
5233         if (src_in_host) {
5234                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5235                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5236                 dir = DMA_TO_DEVICE;
5237                 if (user_memset)
5238                         skip_host_mem_pin = true;
5239         } else {
5240                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5241                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5242                 dir = DMA_FROM_DEVICE;
5243         }
5244
5245         if ((!skip_host_mem_pin) &&
5246                 (!hl_userptr_is_pinned(hdev, addr,
5247                                         le32_to_cpu(user_dma_pkt->tsize),
5248                                         parser->job_userptr_list, &userptr))) {
5249                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5250                                 addr, user_dma_pkt->tsize);
5251                 return -EFAULT;
5252         }
5253
5254         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5255                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5256                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5257                 return 0;
5258         }
5259
5260         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5261
5262         sgt = userptr->sgt;
5263         dma_desc_cnt = 0;
5264
5265         for_each_sgtable_dma_sg(sgt, sg, count) {
5266                 len = sg_dma_len(sg);
5267                 dma_addr = sg_dma_address(sg);
5268
5269                 if (len == 0)
5270                         break;
5271
5272                 while ((count + 1) < sgt->nents) {
5273                         sg_next_iter = sg_next(sg);
5274                         len_next = sg_dma_len(sg_next_iter);
5275                         dma_addr_next = sg_dma_address(sg_next_iter);
5276
5277                         if (len_next == 0)
5278                                 break;
5279
5280                         if ((dma_addr + len == dma_addr_next) &&
5281                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5282                                 len += len_next;
5283                                 count++;
5284                                 sg = sg_next_iter;
5285                         } else {
5286                                 break;
5287                         }
5288                 }
5289
5290                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5291                 if (likely(dma_desc_cnt))
5292                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5293                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5294                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5295                 new_dma_pkt->tsize = cpu_to_le32(len);
5296
5297                 if (dir == DMA_TO_DEVICE) {
5298                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5299                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5300                 } else {
5301                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5302                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5303                 }
5304
5305                 if (!user_memset)
5306                         device_memory_addr += len;
5307                 dma_desc_cnt++;
5308                 new_dma_pkt++;
5309         }
5310
5311         if (!dma_desc_cnt) {
5312                 dev_err(hdev->dev,
5313                         "Error of 0 SG entries when patching DMA packet\n");
5314                 return -EFAULT;
5315         }
5316
5317         /* Fix the last dma packet - wrcomp must be as user set it */
5318         new_dma_pkt--;
5319         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5320
5321         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5322
5323         return 0;
5324 }
5325
5326 static int gaudi_patch_cb(struct hl_device *hdev,
5327                                 struct hl_cs_parser *parser)
5328 {
5329         u32 cb_parsed_length = 0;
5330         u32 cb_patched_cur_length = 0;
5331         int rc = 0;
5332
5333         /* cb_user_size is more than 0 so loop will always be executed */
5334         while (cb_parsed_length < parser->user_cb_size) {
5335                 enum packet_id pkt_id;
5336                 u16 pkt_size;
5337                 u32 new_pkt_size = 0;
5338                 struct gaudi_packet *user_pkt, *kernel_pkt;
5339
5340                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5341                 kernel_pkt = parser->patched_cb->kernel_address +
5342                                         cb_patched_cur_length;
5343
5344                 pkt_id = (enum packet_id) (
5345                                 (le64_to_cpu(user_pkt->header) &
5346                                 PACKET_HEADER_PACKET_ID_MASK) >>
5347                                         PACKET_HEADER_PACKET_ID_SHIFT);
5348
5349                 if (!validate_packet_id(pkt_id)) {
5350                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5351                         rc = -EINVAL;
5352                         break;
5353                 }
5354
5355                 pkt_size = gaudi_packet_sizes[pkt_id];
5356                 cb_parsed_length += pkt_size;
5357                 if (cb_parsed_length > parser->user_cb_size) {
5358                         dev_err(hdev->dev,
5359                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5360                         rc = -EINVAL;
5361                         break;
5362                 }
5363
5364                 switch (pkt_id) {
5365                 case PACKET_LIN_DMA:
5366                         rc = gaudi_patch_dma_packet(hdev, parser,
5367                                         (struct packet_lin_dma *) user_pkt,
5368                                         (struct packet_lin_dma *) kernel_pkt,
5369                                         &new_pkt_size);
5370                         cb_patched_cur_length += new_pkt_size;
5371                         break;
5372
5373                 case PACKET_MSG_PROT:
5374                         dev_err(hdev->dev,
5375                                 "User not allowed to use MSG_PROT\n");
5376                         rc = -EPERM;
5377                         break;
5378
5379                 case PACKET_CP_DMA:
5380                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5381                         rc = -EPERM;
5382                         break;
5383
5384                 case PACKET_STOP:
5385                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5386                         rc = -EPERM;
5387                         break;
5388
5389                 case PACKET_WREG_32:
5390                 case PACKET_WREG_BULK:
5391                 case PACKET_MSG_LONG:
5392                 case PACKET_MSG_SHORT:
5393                 case PACKET_REPEAT:
5394                 case PACKET_FENCE:
5395                 case PACKET_NOP:
5396                 case PACKET_ARB_POINT:
5397                 case PACKET_LOAD_AND_EXE:
5398                         memcpy(kernel_pkt, user_pkt, pkt_size);
5399                         cb_patched_cur_length += pkt_size;
5400                         break;
5401
5402                 default:
5403                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5404                                 pkt_id);
5405                         rc = -EINVAL;
5406                         break;
5407                 }
5408
5409                 if (rc)
5410                         break;
5411         }
5412
5413         return rc;
5414 }
5415
5416 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5417                 struct hl_cs_parser *parser)
5418 {
5419         u64 handle;
5420         u32 patched_cb_size;
5421         struct hl_cb *user_cb;
5422         int rc;
5423
5424         /*
5425          * The new CB should have space at the end for two MSG_PROT packets:
5426          * 1. Optional NOP padding for cacheline alignment
5427          * 2. A packet that will act as a completion packet
5428          * 3. A packet that will generate MSI interrupt
5429          */
5430         if (parser->completion)
5431                 parser->patched_cb_size = parser->user_cb_size +
5432                                 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5433         else
5434                 parser->patched_cb_size = parser->user_cb_size;
5435
5436         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5437                                 parser->patched_cb_size, false, false,
5438                                 &handle);
5439
5440         if (rc) {
5441                 dev_err(hdev->dev,
5442                         "Failed to allocate patched CB for DMA CS %d\n",
5443                         rc);
5444                 return rc;
5445         }
5446
5447         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5448         /* hl_cb_get should never fail */
5449         if (!parser->patched_cb) {
5450                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5451                 rc = -EFAULT;
5452                 goto out;
5453         }
5454
5455         /*
5456          * We are protected from overflow because the check
5457          * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5458          * in the common code. That check is done only if is_kernel_allocated_cb is true.
5459          *
5460          * There is no option to reach here without going through that check because:
5461          * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5462          *    an external queue.
5463          * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5464          */
5465         memcpy(parser->patched_cb->kernel_address,
5466                 parser->user_cb->kernel_address,
5467                 parser->user_cb_size);
5468
5469         patched_cb_size = parser->patched_cb_size;
5470
5471         /* Validate patched CB instead of user CB */
5472         user_cb = parser->user_cb;
5473         parser->user_cb = parser->patched_cb;
5474         rc = gaudi_validate_cb(hdev, parser, true);
5475         parser->user_cb = user_cb;
5476
5477         if (rc) {
5478                 hl_cb_put(parser->patched_cb);
5479                 goto out;
5480         }
5481
5482         if (patched_cb_size != parser->patched_cb_size) {
5483                 dev_err(hdev->dev, "user CB size mismatch\n");
5484                 hl_cb_put(parser->patched_cb);
5485                 rc = -EINVAL;
5486                 goto out;
5487         }
5488
5489 out:
5490         /*
5491          * Always call cb destroy here because we still have 1 reference
5492          * to it by calling cb_get earlier. After the job will be completed,
5493          * cb_put will release it, but here we want to remove it from the
5494          * idr
5495          */
5496         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5497
5498         return rc;
5499 }
5500
5501 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5502                 struct hl_cs_parser *parser)
5503 {
5504         u64 handle;
5505         int rc;
5506
5507         rc = gaudi_validate_cb(hdev, parser, false);
5508
5509         if (rc)
5510                 goto free_userptr;
5511
5512         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5513                                 parser->patched_cb_size, false, false,
5514                                 &handle);
5515         if (rc) {
5516                 dev_err(hdev->dev,
5517                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5518                 goto free_userptr;
5519         }
5520
5521         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5522         /* hl_cb_get should never fail here */
5523         if (!parser->patched_cb) {
5524                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5525                 rc = -EFAULT;
5526                 goto out;
5527         }
5528
5529         rc = gaudi_patch_cb(hdev, parser);
5530
5531         if (rc)
5532                 hl_cb_put(parser->patched_cb);
5533
5534 out:
5535         /*
5536          * Always call cb destroy here because we still have 1 reference
5537          * to it by calling cb_get earlier. After the job will be completed,
5538          * cb_put will release it, but here we want to remove it from the
5539          * idr
5540          */
5541         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5542
5543 free_userptr:
5544         if (rc)
5545                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5546         return rc;
5547 }
5548
5549 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5550                                         struct hl_cs_parser *parser)
5551 {
5552         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5553         struct gaudi_device *gaudi = hdev->asic_specific;
5554         u32 nic_queue_offset, nic_mask_q_id;
5555
5556         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5557                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5558                 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5559                 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5560
5561                 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5562                         dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5563                         return -EINVAL;
5564                 }
5565         }
5566
5567         /* For internal queue jobs just check if CB address is valid */
5568         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5569                                         parser->user_cb_size,
5570                                         asic_prop->sram_user_base_address,
5571                                         asic_prop->sram_end_address))
5572                 return 0;
5573
5574         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5575                                         parser->user_cb_size,
5576                                         asic_prop->dram_user_base_address,
5577                                         asic_prop->dram_end_address))
5578                 return 0;
5579
5580         /* PMMU and HPMMU addresses are equal, check only one of them */
5581         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5582                                         parser->user_cb_size,
5583                                         asic_prop->pmmu.start_addr,
5584                                         asic_prop->pmmu.end_addr))
5585                 return 0;
5586
5587         dev_err(hdev->dev,
5588                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5589                 parser->user_cb, parser->user_cb_size);
5590
5591         return -EFAULT;
5592 }
5593
5594 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5595 {
5596         struct gaudi_device *gaudi = hdev->asic_specific;
5597
5598         if (parser->queue_type == QUEUE_TYPE_INT)
5599                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5600
5601         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5602                 return gaudi_parse_cb_mmu(hdev, parser);
5603         else
5604                 return gaudi_parse_cb_no_mmu(hdev, parser);
5605 }
5606
5607 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5608                                 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5609                                 u32 msi_vec, bool eb)
5610 {
5611         struct gaudi_device *gaudi = hdev->asic_specific;
5612         struct packet_msg_prot *cq_pkt;
5613         struct packet_nop *cq_padding;
5614         u64 msi_addr;
5615         u32 tmp;
5616
5617         cq_padding = kernel_address + original_len;
5618         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5619
5620         while ((void *)cq_padding < (void *)cq_pkt) {
5621                 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5622                 cq_padding++;
5623         }
5624
5625         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5626         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5627
5628         if (eb)
5629                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5630
5631         cq_pkt->ctl = cpu_to_le32(tmp);
5632         cq_pkt->value = cpu_to_le32(cq_val);
5633         cq_pkt->addr = cpu_to_le64(cq_addr);
5634
5635         cq_pkt++;
5636
5637         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5638         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5639         cq_pkt->ctl = cpu_to_le32(tmp);
5640         cq_pkt->value = cpu_to_le32(1);
5641
5642         if (gaudi->multi_msi_mode)
5643                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5644         else
5645                 msi_addr = mmPCIE_CORE_MSI_REQ;
5646
5647         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5648 }
5649
5650 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5651 {
5652         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5653 }
5654
5655 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5656                                         u32 size, u64 val)
5657 {
5658         struct packet_lin_dma *lin_dma_pkt;
5659         struct hl_cs_job *job;
5660         u32 cb_size, ctl, err_cause;
5661         struct hl_cb *cb;
5662         int rc;
5663
5664         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5665         if (!cb)
5666                 return -EFAULT;
5667
5668         lin_dma_pkt = cb->kernel_address;
5669         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5670         cb_size = sizeof(*lin_dma_pkt);
5671
5672         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5673         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5674         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5675         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5676         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5677
5678         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5679         lin_dma_pkt->src_addr = cpu_to_le64(val);
5680         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5681         lin_dma_pkt->tsize = cpu_to_le32(size);
5682
5683         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5684         if (!job) {
5685                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5686                 rc = -ENOMEM;
5687                 goto release_cb;
5688         }
5689
5690         /* Verify DMA is OK */
5691         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5692         if (err_cause && !hdev->init_done) {
5693                 dev_dbg(hdev->dev,
5694                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5695                         err_cause);
5696                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5697         }
5698
5699         job->id = 0;
5700         job->user_cb = cb;
5701         atomic_inc(&job->user_cb->cs_cnt);
5702         job->user_cb_size = cb_size;
5703         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5704         job->patched_cb = job->user_cb;
5705         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5706
5707         hl_debugfs_add_job(hdev, job);
5708
5709         rc = gaudi_send_job_on_qman0(hdev, job);
5710         hl_debugfs_remove_job(hdev, job);
5711         kfree(job);
5712         atomic_dec(&cb->cs_cnt);
5713
5714         /* Verify DMA is OK */
5715         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5716         if (err_cause) {
5717                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5718                 rc = -EIO;
5719                 if (!hdev->init_done) {
5720                         dev_dbg(hdev->dev,
5721                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5722                                 err_cause);
5723                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5724                 }
5725         }
5726
5727 release_cb:
5728         hl_cb_put(cb);
5729         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5730
5731         return rc;
5732 }
5733
5734 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5735                                         u32 num_regs, u32 val)
5736 {
5737         struct packet_msg_long *pkt;
5738         struct hl_cs_job *job;
5739         u32 cb_size, ctl;
5740         struct hl_cb *cb;
5741         int i, rc;
5742
5743         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5744
5745         if (cb_size > SZ_2M) {
5746                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5747                 return -ENOMEM;
5748         }
5749
5750         cb = hl_cb_kernel_create(hdev, cb_size, false);
5751         if (!cb)
5752                 return -EFAULT;
5753
5754         pkt = cb->kernel_address;
5755
5756         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5757         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5758         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5759         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5760         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5761
5762         for (i = 0; i < num_regs ; i++, pkt++) {
5763                 pkt->ctl = cpu_to_le32(ctl);
5764                 pkt->value = cpu_to_le32(val);
5765                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5766         }
5767
5768         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5769         if (!job) {
5770                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5771                 rc = -ENOMEM;
5772                 goto release_cb;
5773         }
5774
5775         job->id = 0;
5776         job->user_cb = cb;
5777         atomic_inc(&job->user_cb->cs_cnt);
5778         job->user_cb_size = cb_size;
5779         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5780         job->patched_cb = job->user_cb;
5781         job->job_cb_size = cb_size;
5782
5783         hl_debugfs_add_job(hdev, job);
5784
5785         rc = gaudi_send_job_on_qman0(hdev, job);
5786         hl_debugfs_remove_job(hdev, job);
5787         kfree(job);
5788         atomic_dec(&cb->cs_cnt);
5789
5790 release_cb:
5791         hl_cb_put(cb);
5792         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5793
5794         return rc;
5795 }
5796
5797 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5798 {
5799         u64 base_addr;
5800         u32 num_regs;
5801         int rc;
5802
5803         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5804         num_regs = NUM_OF_SOB_IN_BLOCK;
5805         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5806         if (rc) {
5807                 dev_err(hdev->dev, "failed resetting SM registers");
5808                 return -ENOMEM;
5809         }
5810
5811         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5812         num_regs = NUM_OF_SOB_IN_BLOCK;
5813         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5814         if (rc) {
5815                 dev_err(hdev->dev, "failed resetting SM registers");
5816                 return -ENOMEM;
5817         }
5818
5819         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5820         num_regs = NUM_OF_SOB_IN_BLOCK;
5821         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5822         if (rc) {
5823                 dev_err(hdev->dev, "failed resetting SM registers");
5824                 return -ENOMEM;
5825         }
5826
5827         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5828         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5829         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5830         if (rc) {
5831                 dev_err(hdev->dev, "failed resetting SM registers");
5832                 return -ENOMEM;
5833         }
5834
5835         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5836         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5837         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5838         if (rc) {
5839                 dev_err(hdev->dev, "failed resetting SM registers");
5840                 return -ENOMEM;
5841         }
5842
5843         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5844         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5845         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5846         if (rc) {
5847                 dev_err(hdev->dev, "failed resetting SM registers");
5848                 return -ENOMEM;
5849         }
5850
5851         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5852                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5853         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5854         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5855         if (rc) {
5856                 dev_err(hdev->dev, "failed resetting SM registers");
5857                 return -ENOMEM;
5858         }
5859
5860         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5861                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5862         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5863         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5864         if (rc) {
5865                 dev_err(hdev->dev, "failed resetting SM registers");
5866                 return -ENOMEM;
5867         }
5868
5869         return 0;
5870 }
5871
5872 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5873 {
5874         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5875                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5876         int i;
5877
5878         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5879                 u64 sob_addr = CFG_BASE +
5880                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5881                                 (i * sob_delta);
5882                 u32 dma_offset = i * DMA_CORE_OFFSET;
5883
5884                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5885                                 lower_32_bits(sob_addr));
5886                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5887                                 upper_32_bits(sob_addr));
5888                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5889
5890                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5891                  * modified by the user for SRAM reduction
5892                  */
5893                 if (i > 1)
5894                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5895                                                                 0x00000001);
5896         }
5897 }
5898
5899 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5900 {
5901         u32 qman_offset;
5902         int i;
5903
5904         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5905                 qman_offset = i * DMA_QMAN_OFFSET;
5906                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5907         }
5908
5909         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5910                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5911                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5912         }
5913
5914         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5915                 qman_offset = i * TPC_QMAN_OFFSET;
5916                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5917         }
5918
5919         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5920                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5921                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5922                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5923         }
5924 }
5925
5926 static int gaudi_restore_user_registers(struct hl_device *hdev)
5927 {
5928         int rc;
5929
5930         rc = gaudi_restore_sm_registers(hdev);
5931         if (rc)
5932                 return rc;
5933
5934         gaudi_restore_dma_registers(hdev);
5935         gaudi_restore_qm_registers(hdev);
5936
5937         return 0;
5938 }
5939
5940 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5941 {
5942         return 0;
5943 }
5944
5945 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5946 {
5947         u32 size = hdev->asic_prop.mmu_pgt_size +
5948                         hdev->asic_prop.mmu_cache_mng_size;
5949         struct gaudi_device *gaudi = hdev->asic_specific;
5950         u64 addr = hdev->asic_prop.mmu_pgt_addr;
5951
5952         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5953                 return 0;
5954
5955         return gaudi_memset_device_memory(hdev, addr, size, 0);
5956 }
5957
5958 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5959 {
5960
5961 }
5962
5963 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5964                                         u32 size_to_dma, dma_addr_t dma_addr)
5965 {
5966         u32 err_cause, val;
5967         u64 dma_offset;
5968         int rc;
5969
5970         dma_offset = dma_id * DMA_CORE_OFFSET;
5971
5972         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5973         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5974         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5975         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5976         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5977         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5978                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5979
5980         rc = hl_poll_timeout(
5981                 hdev,
5982                 mmDMA0_CORE_STS0 + dma_offset,
5983                 val,
5984                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5985                 0,
5986                 1000000);
5987
5988         if (rc) {
5989                 dev_err(hdev->dev,
5990                         "DMA %d timed-out during reading of 0x%llx\n",
5991                         dma_id, addr);
5992                 return -EIO;
5993         }
5994
5995         /* Verify DMA is OK */
5996         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5997         if (err_cause) {
5998                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5999                 dev_dbg(hdev->dev,
6000                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6001                         err_cause);
6002                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6003
6004                 return -EIO;
6005         }
6006
6007         return 0;
6008 }
6009
6010 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6011                                 void *blob_addr)
6012 {
6013         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6014         u32 qm_glbl_sts0, qm_cgm_sts;
6015         u64 dma_offset, qm_offset;
6016         dma_addr_t dma_addr;
6017         void *kernel_addr;
6018         bool is_eng_idle;
6019         int rc = 0, dma_id;
6020
6021         kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6022
6023         if (!kernel_addr)
6024                 return -ENOMEM;
6025
6026         hdev->asic_funcs->hw_queues_lock(hdev);
6027
6028         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6029         dma_offset = dma_id * DMA_CORE_OFFSET;
6030         qm_offset = dma_id * DMA_QMAN_OFFSET;
6031         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6032         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6033         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6034         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6035                       IS_DMA_IDLE(dma_core_sts0);
6036
6037         if (!is_eng_idle) {
6038                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6039                 dma_offset = dma_id * DMA_CORE_OFFSET;
6040                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6041                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6042                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6043                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6044                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6045                               IS_DMA_IDLE(dma_core_sts0);
6046
6047                 if (!is_eng_idle) {
6048                         dev_err_ratelimited(hdev->dev,
6049                                 "Can't read via DMA because it is BUSY\n");
6050                         rc = -EAGAIN;
6051                         goto out;
6052                 }
6053         }
6054
6055         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6056         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6057                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6058
6059         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6060          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6061          * ASID
6062          */
6063         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6064
6065         /* Verify DMA is OK */
6066         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6067         if (err_cause) {
6068                 dev_dbg(hdev->dev,
6069                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6070                         err_cause);
6071                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6072         }
6073
6074         pos = 0;
6075         size_left = size;
6076         size_to_dma = SZ_2M;
6077
6078         while (size_left > 0) {
6079
6080                 if (size_left < SZ_2M)
6081                         size_to_dma = size_left;
6082
6083                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6084                                                 dma_addr);
6085                 if (rc)
6086                         break;
6087
6088                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6089
6090                 if (size_left <= SZ_2M)
6091                         break;
6092
6093                 pos += SZ_2M;
6094                 addr += SZ_2M;
6095                 size_left -= SZ_2M;
6096         }
6097
6098         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6099          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6100          * ASID
6101          */
6102         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6103                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6104
6105         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6106
6107 out:
6108         hdev->asic_funcs->hw_queues_unlock(hdev);
6109
6110         hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6111
6112         return rc;
6113 }
6114
6115 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6116 {
6117         struct gaudi_device *gaudi = hdev->asic_specific;
6118
6119         if (hdev->reset_info.hard_reset_pending)
6120                 return U64_MAX;
6121
6122         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6123                         (addr - gaudi->hbm_bar_cur_addr));
6124 }
6125
6126 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6127 {
6128         struct gaudi_device *gaudi = hdev->asic_specific;
6129
6130         if (hdev->reset_info.hard_reset_pending)
6131                 return;
6132
6133         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6134                         (addr - gaudi->hbm_bar_cur_addr));
6135 }
6136
6137 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6138 {
6139         /* mask to zero the MMBP and ASID bits */
6140         WREG32_AND(reg, ~0x7FF);
6141         WREG32_OR(reg, asid);
6142 }
6143
6144 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6145 {
6146         struct gaudi_device *gaudi = hdev->asic_specific;
6147
6148         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6149                 return;
6150
6151         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6152                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6153                 return;
6154         }
6155
6156         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6157         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6158         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6159         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6160         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6161
6162         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6163         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6164         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6165         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6166         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6167
6168         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173
6174         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6175         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6176         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6177         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6178         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6179
6180         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6181         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6182         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6183         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6184         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6185
6186         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6187         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6188         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6189         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6190         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6191
6192         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6193         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6194         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6195         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6196         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6197
6198         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6199         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6200         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6201         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6202         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6203
6204         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6205         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6206         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6207         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6208         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6209         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6210         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6211         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6212
6213         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6214         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6215         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6216         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6217         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6218         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6219         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6220
6221         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6222         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6223         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6224         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6225         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6226         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6227         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6228
6229         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6230         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6231         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6232         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6233         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6234         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6235         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6236
6237         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6238         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6239         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6240         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6241         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6242         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6243         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6244
6245         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6246         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6247         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6248         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6249         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6250         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6251         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6252
6253         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6254         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6255         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6256         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6257         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6258         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6259         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6260
6261         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6262         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6263         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6264         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6265         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6266         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6267         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6268
6269         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6270         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6271         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6272         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6273         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6274         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6275         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6276
6277         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6278         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6279         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6280         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6281         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6282         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6283         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6284         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6285         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6286         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6287
6288         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6289         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6290         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6291         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6292         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6293         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6294         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6295         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6296         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6297         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6298         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6299         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6300
6301         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6302                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6303                                 asid);
6304                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6305                                 asid);
6306                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6307                                 asid);
6308                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6309                                 asid);
6310                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6311                                 asid);
6312         }
6313
6314         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6315                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6316                                 asid);
6317                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6318                                 asid);
6319                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6320                                 asid);
6321                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6322                                 asid);
6323                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6324                                 asid);
6325         }
6326
6327         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6328                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6329                                 asid);
6330                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6331                                 asid);
6332                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6333                                 asid);
6334                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6335                                 asid);
6336                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6337                                 asid);
6338         }
6339
6340         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6341                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6342                                 asid);
6343                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6344                                 asid);
6345                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6346                                 asid);
6347                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6348                                 asid);
6349                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6350                                 asid);
6351         }
6352
6353         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6354                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6355                                 asid);
6356                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6357                                 asid);
6358                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6359                                 asid);
6360                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6361                                 asid);
6362                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6363                                 asid);
6364         }
6365
6366         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6367                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6368                                 asid);
6369                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6370                                 asid);
6371                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6372                                 asid);
6373                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6374                                 asid);
6375                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6376                                 asid);
6377         }
6378
6379         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6380                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6381                                 asid);
6382                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6383                                 asid);
6384                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6385                                 asid);
6386                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6387                                 asid);
6388                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6389                                 asid);
6390         }
6391
6392         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6393                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6394                                 asid);
6395                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6396                                 asid);
6397                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6398                                 asid);
6399                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6400                                 asid);
6401                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6402                                 asid);
6403         }
6404
6405         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6406                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6407                                 asid);
6408                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6409                                 asid);
6410                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6411                                 asid);
6412                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6413                                 asid);
6414                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6415                                 asid);
6416         }
6417
6418         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6419                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6420                                 asid);
6421                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6422                                 asid);
6423                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6424                                 asid);
6425                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6426                                 asid);
6427                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6428                                 asid);
6429         }
6430
6431         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6432         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6433 }
6434
6435 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6436                 struct hl_cs_job *job)
6437 {
6438         struct packet_msg_prot *fence_pkt;
6439         u32 *fence_ptr;
6440         dma_addr_t fence_dma_addr;
6441         struct hl_cb *cb;
6442         u32 tmp, timeout, dma_offset;
6443         int rc;
6444
6445         if (hdev->pldm)
6446                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6447         else
6448                 timeout = HL_DEVICE_TIMEOUT_USEC;
6449
6450         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6451                 dev_err_ratelimited(hdev->dev,
6452                         "Can't send driver job on QMAN0 because the device is not idle\n");
6453                 return -EBUSY;
6454         }
6455
6456         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6457         if (!fence_ptr) {
6458                 dev_err(hdev->dev,
6459                         "Failed to allocate fence memory for QMAN0\n");
6460                 return -ENOMEM;
6461         }
6462
6463         cb = job->patched_cb;
6464
6465         fence_pkt = cb->kernel_address +
6466                         job->job_cb_size - sizeof(struct packet_msg_prot);
6467
6468         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6469         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6470         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6471
6472         fence_pkt->ctl = cpu_to_le32(tmp);
6473         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6474         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6475
6476         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6477
6478         WREG32(mmDMA0_CORE_PROT + dma_offset,
6479                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6480
6481         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6482                                         job->job_cb_size, cb->bus_address);
6483         if (rc) {
6484                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6485                 goto free_fence_ptr;
6486         }
6487
6488         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6489                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6490                                 timeout, true);
6491
6492         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6493
6494         if (rc == -ETIMEDOUT) {
6495                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6496                 goto free_fence_ptr;
6497         }
6498
6499 free_fence_ptr:
6500         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6501
6502         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6503         return rc;
6504 }
6505
6506 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6507 {
6508         if (event_type >= GAUDI_EVENT_SIZE)
6509                 goto event_not_supported;
6510
6511         if (!gaudi_irq_map_table[event_type].valid)
6512                 goto event_not_supported;
6513
6514         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6515
6516         return;
6517
6518 event_not_supported:
6519         snprintf(desc, size, "N/A");
6520 }
6521
6522 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6523                                                         bool is_write, s32 *engine_id_1,
6524                                                         s32 *engine_id_2)
6525 {
6526         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6527
6528         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6529                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6530
6531         switch (x_y) {
6532         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6533         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6534                 dma_id[0] = 0;
6535                 dma_id[1] = 2;
6536                 break;
6537         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6538         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6539                 dma_id[0] = 1;
6540                 dma_id[1] = 3;
6541                 break;
6542         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6543         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6544                 dma_id[0] = 4;
6545                 dma_id[1] = 6;
6546                 break;
6547         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6548         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6549                 dma_id[0] = 5;
6550                 dma_id[1] = 7;
6551                 break;
6552         default:
6553                 goto unknown_initiator;
6554         }
6555
6556         for (i = 0 ; i < 2 ; i++) {
6557                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6558                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6559         }
6560
6561         switch (x_y) {
6562         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6563         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6564                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6565                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6566                         return "DMA0";
6567                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6568                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6569                         return "DMA2";
6570                 } else {
6571                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6572                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6573                         return "DMA0 or DMA2";
6574                 }
6575         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6576         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6577                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6578                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6579                         return "DMA1";
6580                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6581                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6582                         return "DMA3";
6583                 } else {
6584                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6585                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6586                         return "DMA1 or DMA3";
6587                 }
6588         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6589         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6590                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6591                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6592                         return "DMA4";
6593                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6594                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6595                         return "DMA6";
6596                 } else {
6597                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6598                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6599                         return "DMA4 or DMA6";
6600                 }
6601         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6602         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6603                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6604                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6605                         return "DMA5";
6606                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6607                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6608                         return "DMA7";
6609                 } else {
6610                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6611                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6612                         return "DMA5 or DMA7";
6613                 }
6614         }
6615
6616 unknown_initiator:
6617         return "unknown initiator";
6618 }
6619
6620 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6621                                                         u32 *engine_id_1, u32 *engine_id_2)
6622 {
6623         u32 val, x_y, axi_id;
6624
6625         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6626                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6627         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6628                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6629         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6630                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6631
6632         switch (x_y) {
6633         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6634                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6635                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6636                         return "TPC0";
6637                 }
6638                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6639                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6640                         return "NIC0";
6641                 }
6642                 break;
6643         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6644                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6645                 return "TPC1";
6646         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6647         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6648                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6649                 return "MME0";
6650         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6651         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6652                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6653                 return "MME1";
6654         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6655                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6656                 return "TPC2";
6657         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6658                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6659                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6660                         return "TPC3";
6661                 }
6662                 /* PCI, CPU or PSOC does not have engine id*/
6663                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6664                         return "PCI";
6665                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6666                         return "CPU";
6667                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6668                         return "PSOC";
6669                 break;
6670         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6671         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6672         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6673         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6674         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6675         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6676         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6677         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6678                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6679                                 engine_id_1, engine_id_2);
6680         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6681                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6682                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6683                         return "TPC4";
6684                 }
6685                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6686                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6687                         return "NIC1";
6688                 }
6689                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6690                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6691                         return "NIC2";
6692                 }
6693                 break;
6694         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6695                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6696                 return "TPC5";
6697         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6698         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6699                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6700                 return "MME2";
6701         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6702         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6703                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6704                 return "MME3";
6705         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6706                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6707                 return "TPC6";
6708         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6709                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6710                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6711                         return "TPC7";
6712                 }
6713                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6714                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6715                         return "NIC4";
6716                 }
6717                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6718                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6719                         return "NIC5";
6720                 }
6721                 break;
6722         default:
6723                 break;
6724         }
6725
6726         dev_err(hdev->dev,
6727                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6728                 val,
6729                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6730                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6731                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6732                         RAZWI_INITIATOR_AXI_ID_MASK);
6733
6734         return "unknown initiator";
6735 }
6736
6737 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
6738                                                 u32 *engine_id_2)
6739 {
6740
6741         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6742                 dev_err_ratelimited(hdev->dev,
6743                         "RAZWI event caused by illegal write of %s\n",
6744                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6745                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6746         }
6747
6748         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6749                 dev_err_ratelimited(hdev->dev,
6750                         "RAZWI event caused by illegal read of %s\n",
6751                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6752                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6753         }
6754 }
6755
6756 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
6757 {
6758         struct gaudi_device *gaudi = hdev->asic_specific;
6759         u32 val;
6760
6761         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6762                 return;
6763
6764         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6765         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6766                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6767                 *addr <<= 32;
6768                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6769
6770                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6771                 *type = HL_RAZWI_PAGE_FAULT;
6772
6773                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6774         }
6775
6776         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6777         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6778                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6779                 *addr <<= 32;
6780                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6781
6782                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6783                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
6784
6785                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6786         }
6787 }
6788
6789 /*
6790  *  +-------------------+------------------------------------------------------+
6791  *  | Configuration Reg |                     Description                      |
6792  *  |      Address      |                                                      |
6793  *  +-------------------+------------------------------------------------------+
6794  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6795  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6796  *  |                   |0xF34 memory wrappers 63:32                           |
6797  *  |                   |0xF38 memory wrappers 95:64                           |
6798  *  |                   |0xF3C memory wrappers 127:96                          |
6799  *  +-------------------+------------------------------------------------------+
6800  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6801  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6802  *  |                   |0xF44 memory wrappers 63:32                           |
6803  *  |                   |0xF48 memory wrappers 95:64                           |
6804  *  |                   |0xF4C memory wrappers 127:96                          |
6805  *  +-------------------+------------------------------------------------------+
6806  */
6807 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6808                 struct ecc_info_extract_params *params, u64 *ecc_address,
6809                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6810 {
6811         u32 i, num_mem_regs, reg, err_bit;
6812         u64 err_addr, err_word = 0;
6813
6814         num_mem_regs = params->num_memories / 32 +
6815                         ((params->num_memories % 32) ? 1 : 0);
6816
6817         if (params->block_address >= CFG_BASE)
6818                 params->block_address -= CFG_BASE;
6819
6820         if (params->derr)
6821                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6822         else
6823                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6824
6825         /* Set invalid wrapper index */
6826         *memory_wrapper_idx = 0xFF;
6827
6828         /* Iterate through memory wrappers, a single bit must be set */
6829         for (i = 0 ; i < num_mem_regs ; i++) {
6830                 err_addr += i * 4;
6831                 err_word = RREG32(err_addr);
6832                 if (err_word) {
6833                         err_bit = __ffs(err_word);
6834                         *memory_wrapper_idx = err_bit + (32 * i);
6835                         break;
6836                 }
6837         }
6838
6839         if (*memory_wrapper_idx == 0xFF) {
6840                 dev_err(hdev->dev, "ECC error information cannot be found\n");
6841                 return -EINVAL;
6842         }
6843
6844         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6845                         *memory_wrapper_idx);
6846
6847         *ecc_address =
6848                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6849         *ecc_syndrom =
6850                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6851
6852         /* Clear error indication */
6853         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6854         if (params->derr)
6855                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6856         else
6857                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6858
6859         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6860
6861         return 0;
6862 }
6863
6864 /*
6865  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6866  *
6867  * @idx: the current pi/ci value
6868  * @q_len: the queue length (power of 2)
6869  *
6870  * @return the cyclically decremented index
6871  */
6872 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6873 {
6874         u32 mask = q_len - 1;
6875
6876         /*
6877          * modular decrement is equivalent to adding (queue_size -1)
6878          * later we take LSBs to make sure the value is in the
6879          * range [0, queue_len - 1]
6880          */
6881         return (idx + q_len - 1) & mask;
6882 }
6883
6884 /**
6885  * gaudi_handle_sw_config_stream_data - print SW config stream data
6886  *
6887  * @hdev: pointer to the habanalabs device structure
6888  * @stream: the QMAN's stream
6889  * @qman_base: base address of QMAN registers block
6890  * @event_mask: mask of the last events occurred
6891  */
6892 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6893                                                 u64 qman_base, u64 event_mask)
6894 {
6895         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6896         u32 cq_ptr_lo_off, size;
6897
6898         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6899
6900         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6901                                                 stream * cq_ptr_lo_off;
6902         cq_ptr_hi = cq_ptr_lo +
6903                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6904         cq_tsize = cq_ptr_lo +
6905                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6906
6907         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6908         size = RREG32(cq_tsize);
6909         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6910                                                         stream, cq_ptr, size);
6911
6912         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6913                 hdev->last_error.undef_opcode.cq_addr = cq_ptr;
6914                 hdev->last_error.undef_opcode.cq_size = size;
6915                 hdev->last_error.undef_opcode.stream_id = stream;
6916         }
6917 }
6918
6919 /**
6920  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6921  *
6922  * @hdev: pointer to the habanalabs device structure
6923  * @qid_base: first QID of the QMAN (out of 4 streams)
6924  * @stream: the QMAN's stream
6925  * @qman_base: base address of QMAN registers block
6926  * @event_mask: mask of the last events occurred
6927  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6928  */
6929 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6930                                                 u32 stream, u64 qman_base,
6931                                                 u64 event_mask,
6932                                                 bool pr_sw_conf)
6933 {
6934         u32 ci, qm_ci_stream_off, queue_len;
6935         struct hl_hw_queue *q;
6936         u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6937         int i;
6938
6939         q = &hdev->kernel_queues[qid_base + stream];
6940
6941         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6942         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6943                                                 stream * qm_ci_stream_off;
6944
6945         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6946                                         q->int_queue_len : HL_QUEUE_LENGTH;
6947
6948         hdev->asic_funcs->hw_queues_lock(hdev);
6949
6950         if (pr_sw_conf)
6951                 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6952
6953         ci = RREG32(pq_ci);
6954
6955         /* we should start printing form ci -1 */
6956         ci = gaudi_queue_idx_dec(ci, queue_len);
6957         memset(addr, 0, sizeof(addr));
6958
6959         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6960                 struct hl_bd *bd;
6961                 u32 len;
6962
6963                 bd = q->kernel_address;
6964                 bd += ci;
6965
6966                 len = le32_to_cpu(bd->len);
6967                 /* len 0 means uninitialized entry- break */
6968                 if (!len)
6969                         break;
6970
6971                 addr[i] = le64_to_cpu(bd->ptr);
6972
6973                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6974                                                         stream, ci, addr[i], len);
6975
6976                 /* get previous ci, wrap if needed */
6977                 ci = gaudi_queue_idx_dec(ci, queue_len);
6978         }
6979
6980         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6981                 struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode;
6982                 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6983
6984                 if (arr_idx == 0) {
6985                         undef_opcode->timestamp = ktime_get();
6986                         undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6987                 }
6988
6989                 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6990                 undef_opcode->cb_addr_streams_len++;
6991         }
6992
6993         hdev->asic_funcs->hw_queues_unlock(hdev);
6994 }
6995
6996 /**
6997  * handle_qman_data_on_err - extract QMAN data on error
6998  *
6999  * @hdev: pointer to the habanalabs device structure
7000  * @qid_base: first QID of the QMAN (out of 4 streams)
7001  * @stream: the QMAN's stream
7002  * @qman_base: base address of QMAN registers block
7003  * @event_mask: mask of the last events occurred
7004  *
7005  * This function attempt to exatract as much data as possible on QMAN error.
7006  * On upper CP print the SW config stream data and last 8 PQEs.
7007  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7008  */
7009 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7010                                    u32 stream, u64 qman_base, u64 event_mask)
7011 {
7012         u32 i;
7013
7014         if (stream != QMAN_STREAMS) {
7015                 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7016                         qman_base, event_mask, true);
7017                 return;
7018         }
7019
7020         /* handle Lower-CP */
7021         gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7022
7023         for (i = 0; i < QMAN_STREAMS; i++)
7024                 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7025                         qman_base, event_mask, false);
7026 }
7027
7028 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7029                                           const char *qm_name,
7030                                           u64 qman_base,
7031                                           u32 qid_base,
7032                                           u64 *event_mask)
7033 {
7034         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7035         u64 glbl_sts_addr, arb_err_addr;
7036         char reg_desc[32];
7037
7038         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7039         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7040
7041         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7042         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7043                 glbl_sts_clr_val = 0;
7044                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7045
7046                 if (!glbl_sts_val)
7047                         continue;
7048
7049                 if (i == QMAN_STREAMS)
7050                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7051                 else
7052                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7053
7054                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7055                         if (glbl_sts_val & BIT(j)) {
7056                                 dev_err_ratelimited(hdev->dev,
7057                                                 "%s %s. err cause: %s\n",
7058                                                 qm_name, reg_desc,
7059                                                 gaudi_qman_error_cause[j]);
7060                                 glbl_sts_clr_val |= BIT(j);
7061                         }
7062                 }
7063                 /* check for undefined opcode */
7064                 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7065                                 hdev->last_error.undef_opcode.write_enable) {
7066                         memset(&hdev->last_error.undef_opcode, 0,
7067                                                 sizeof(hdev->last_error.undef_opcode));
7068
7069                         hdev->last_error.undef_opcode.write_enable = false;
7070                         *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7071                 }
7072
7073                 /* Write 1 clear errors */
7074                 if (!hdev->stop_on_err)
7075                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7076                 else
7077                         handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7078         }
7079
7080         arb_err_val = RREG32(arb_err_addr);
7081
7082         if (!arb_err_val)
7083                 return;
7084
7085         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7086                 if (arb_err_val & BIT(j)) {
7087                         dev_err_ratelimited(hdev->dev,
7088                                         "%s ARB_ERR. err cause: %s\n",
7089                                         qm_name,
7090                                         gaudi_qman_arb_error_cause[j]);
7091                 }
7092         }
7093 }
7094
7095 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7096                 struct hl_eq_sm_sei_data *sei_data)
7097 {
7098         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7099
7100         /* Flip the bits as the enum is ordered in the opposite way */
7101         index = (index ^ 0x3) & 0x3;
7102
7103         switch (sei_data->sei_cause) {
7104         case SM_SEI_SO_OVERFLOW:
7105                 dev_err_ratelimited(hdev->dev,
7106                         "%s SEI Error: SOB Group %u overflow/underflow",
7107                         gaudi_sync_manager_names[index],
7108                         le32_to_cpu(sei_data->sei_log));
7109                 break;
7110         case SM_SEI_LBW_4B_UNALIGNED:
7111                 dev_err_ratelimited(hdev->dev,
7112                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7113                         gaudi_sync_manager_names[index],
7114                         le32_to_cpu(sei_data->sei_log));
7115                 break;
7116         case SM_SEI_AXI_RESPONSE_ERR:
7117                 dev_err_ratelimited(hdev->dev,
7118                         "%s SEI Error: AXI ID %u response error",
7119                         gaudi_sync_manager_names[index],
7120                         le32_to_cpu(sei_data->sei_log));
7121                 break;
7122         default:
7123                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7124                                 le32_to_cpu(sei_data->sei_log));
7125                 break;
7126         }
7127 }
7128
7129 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7130                 struct hl_eq_ecc_data *ecc_data)
7131 {
7132         struct ecc_info_extract_params params;
7133         u64 ecc_address = 0, ecc_syndrom = 0;
7134         u8 index, memory_wrapper_idx = 0;
7135         bool extract_info_from_fw;
7136         int rc;
7137
7138         if (hdev->asic_prop.fw_security_enabled) {
7139                 extract_info_from_fw = true;
7140                 goto extract_ecc_info;
7141         }
7142
7143         switch (event_type) {
7144         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7145         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7146                 extract_info_from_fw = true;
7147                 break;
7148         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7149                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7150                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7151                 params.num_memories = 90;
7152                 params.derr = false;
7153                 extract_info_from_fw = false;
7154                 break;
7155         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7156                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7157                 params.block_address =
7158                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7159                 params.num_memories = 90;
7160                 params.derr = true;
7161                 extract_info_from_fw = false;
7162                 break;
7163         case GAUDI_EVENT_MME0_ACC_SERR:
7164         case GAUDI_EVENT_MME1_ACC_SERR:
7165         case GAUDI_EVENT_MME2_ACC_SERR:
7166         case GAUDI_EVENT_MME3_ACC_SERR:
7167                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7168                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7169                 params.num_memories = 128;
7170                 params.derr = false;
7171                 extract_info_from_fw = false;
7172                 break;
7173         case GAUDI_EVENT_MME0_ACC_DERR:
7174         case GAUDI_EVENT_MME1_ACC_DERR:
7175         case GAUDI_EVENT_MME2_ACC_DERR:
7176         case GAUDI_EVENT_MME3_ACC_DERR:
7177                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7178                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7179                 params.num_memories = 128;
7180                 params.derr = true;
7181                 extract_info_from_fw = false;
7182                 break;
7183         case GAUDI_EVENT_MME0_SBAB_SERR:
7184         case GAUDI_EVENT_MME1_SBAB_SERR:
7185         case GAUDI_EVENT_MME2_SBAB_SERR:
7186         case GAUDI_EVENT_MME3_SBAB_SERR:
7187                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7188                 params.block_address =
7189                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7190                 params.num_memories = 33;
7191                 params.derr = false;
7192                 extract_info_from_fw = false;
7193                 break;
7194         case GAUDI_EVENT_MME0_SBAB_DERR:
7195         case GAUDI_EVENT_MME1_SBAB_DERR:
7196         case GAUDI_EVENT_MME2_SBAB_DERR:
7197         case GAUDI_EVENT_MME3_SBAB_DERR:
7198                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7199                 params.block_address =
7200                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7201                 params.num_memories = 33;
7202                 params.derr = true;
7203                 extract_info_from_fw = false;
7204                 break;
7205         default:
7206                 return;
7207         }
7208
7209 extract_ecc_info:
7210         if (extract_info_from_fw) {
7211                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7212                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7213                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7214         } else {
7215                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7216                                 &ecc_syndrom, &memory_wrapper_idx);
7217                 if (rc)
7218                         return;
7219         }
7220
7221         dev_err(hdev->dev,
7222                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7223                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7224 }
7225
7226 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7227 {
7228         u64 qman_base;
7229         char desc[32];
7230         u32 qid_base;
7231         u8 index;
7232
7233         switch (event_type) {
7234         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7235                 /* In TPC QM event, notify on TPC assertion. While there isn't
7236                  * a specific event for assertion yet, the FW generates QM event.
7237                  * The SW upper layer will inspect an internal mapped area to indicate
7238                  * if the event is a tpc assertion or tpc QM.
7239                  */
7240                 *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7241                 index = event_type - GAUDI_EVENT_TPC0_QM;
7242                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7243                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7244                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7245                 break;
7246         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7247                 if (event_type == GAUDI_EVENT_MME0_QM) {
7248                         index = 0;
7249                         qid_base = GAUDI_QUEUE_ID_MME_0_0;
7250                 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7251                         index = 2;
7252                         qid_base = GAUDI_QUEUE_ID_MME_1_0;
7253                 }
7254                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7255                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7256                 break;
7257         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7258                 index = event_type - GAUDI_EVENT_DMA0_QM;
7259                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7260                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7261                 if (index > 1)
7262                         qid_base++;
7263                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7264                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7265                 break;
7266         case GAUDI_EVENT_NIC0_QM0:
7267                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7268                 qman_base = mmNIC0_QM0_BASE;
7269                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7270                 break;
7271         case GAUDI_EVENT_NIC0_QM1:
7272                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7273                 qman_base = mmNIC0_QM1_BASE;
7274                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7275                 break;
7276         case GAUDI_EVENT_NIC1_QM0:
7277                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7278                 qman_base = mmNIC1_QM0_BASE;
7279                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7280                 break;
7281         case GAUDI_EVENT_NIC1_QM1:
7282                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7283                 qman_base = mmNIC1_QM1_BASE;
7284                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7285                 break;
7286         case GAUDI_EVENT_NIC2_QM0:
7287                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7288                 qman_base = mmNIC2_QM0_BASE;
7289                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7290                 break;
7291         case GAUDI_EVENT_NIC2_QM1:
7292                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7293                 qman_base = mmNIC2_QM1_BASE;
7294                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7295                 break;
7296         case GAUDI_EVENT_NIC3_QM0:
7297                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7298                 qman_base = mmNIC3_QM0_BASE;
7299                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7300                 break;
7301         case GAUDI_EVENT_NIC3_QM1:
7302                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7303                 qman_base = mmNIC3_QM1_BASE;
7304                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7305                 break;
7306         case GAUDI_EVENT_NIC4_QM0:
7307                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7308                 qman_base = mmNIC4_QM0_BASE;
7309                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7310                 break;
7311         case GAUDI_EVENT_NIC4_QM1:
7312                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7313                 qman_base = mmNIC4_QM1_BASE;
7314                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7315                 break;
7316         default:
7317                 return;
7318         }
7319
7320         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7321 }
7322
7323 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7324                                         bool razwi)
7325 {
7326         u32 engine_id_1, engine_id_2;
7327         char desc[64] = "";
7328         u64 razwi_addr = 0;
7329         u8 razwi_type;
7330         int rc;
7331
7332         /*
7333          * Init engine id by default as not valid and only if razwi initiated from engine with
7334          * engine id it will get valid value.
7335          * Init razwi type to default, will be changed only if razwi caused by page fault of
7336          * MMU access error
7337          */
7338         engine_id_1 = U16_MAX;
7339         engine_id_2 = U16_MAX;
7340         razwi_type = U8_MAX;
7341
7342         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7343         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7344                 event_type, desc);
7345
7346         if (razwi) {
7347                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7348                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7349
7350                 /* In case it's the first razwi, save its parameters*/
7351                 rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0);
7352                 if (rc) {
7353                         hdev->last_error.razwi.timestamp = ktime_get();
7354                         hdev->last_error.razwi.addr = razwi_addr;
7355                         hdev->last_error.razwi.engine_id_1 = engine_id_1;
7356                         hdev->last_error.razwi.engine_id_2 = engine_id_2;
7357                         /*
7358                          * If first engine id holds non valid value the razwi initiator
7359                          * does not have engine id
7360                          */
7361                         hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
7362                         hdev->last_error.razwi.type = razwi_type;
7363
7364                 }
7365         }
7366 }
7367
7368 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7369                                         struct cpucp_pkt_sync_err *sync_err)
7370 {
7371         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7372
7373         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7374                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7375 }
7376
7377 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7378                                         struct hl_eq_fw_alive *fw_alive)
7379 {
7380         dev_err(hdev->dev,
7381                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7382                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7383                 "Minor" : "Critical", fw_alive->process_id,
7384                 fw_alive->thread_id, fw_alive->uptime_seconds);
7385 }
7386
7387 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7388                                                 void *data)
7389 {
7390         char desc[64] = "", *type;
7391         struct eq_nic_sei_event *eq_nic_sei = data;
7392         u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7393
7394         switch (eq_nic_sei->axi_error_cause) {
7395         case RXB:
7396                 type = "RXB";
7397                 break;
7398         case RXE:
7399                 type = "RXE";
7400                 break;
7401         case TXS:
7402                 type = "TXS";
7403                 break;
7404         case TXE:
7405                 type = "TXE";
7406                 break;
7407         case QPC_RESP:
7408                 type = "QPC_RESP";
7409                 break;
7410         case NON_AXI_ERR:
7411                 type = "NON_AXI_ERR";
7412                 break;
7413         case TMR:
7414                 type = "TMR";
7415                 break;
7416         default:
7417                 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7418                         eq_nic_sei->axi_error_cause);
7419                 type = "N/A";
7420                 break;
7421         }
7422
7423         snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7424                         eq_nic_sei->id);
7425         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7426                 event_type, desc);
7427 }
7428
7429 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7430 {
7431         /* GAUDI doesn't support any reset except hard-reset */
7432         return -EPERM;
7433 }
7434
7435 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7436                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7437 {
7438         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7439         int rc = 0;
7440
7441         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7442                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7443                 if (!hbm_ecc_data) {
7444                         dev_err(hdev->dev, "No FW ECC data");
7445                         return 0;
7446                 }
7447
7448                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7449                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7450                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7451                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7452                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7453                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7454                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7455                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7456                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7457                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7458                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7459                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7460                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7461                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7462
7463                 dev_err(hdev->dev,
7464                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7465                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7466                 dev_err(hdev->dev,
7467                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7468                         device, ch, hbm_ecc_data->first_addr, type,
7469                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7470                         hbm_ecc_data->dec_cnt);
7471                 return 0;
7472         }
7473
7474         if (hdev->asic_prop.fw_security_enabled) {
7475                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7476                 return 0;
7477         }
7478
7479         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7480         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7481                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7482                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7483                 if (val) {
7484                         rc = -EIO;
7485                         dev_err(hdev->dev,
7486                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7487                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7488                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7489                                 (val >> 4) & 0x1);
7490
7491                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7492                         dev_err(hdev->dev,
7493                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7494                                 device, ch * 2,
7495                                 RREG32(base + ch * 0x1000 + 0x064),
7496                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7497                                 (val2 & 0xFF0000) >> 16,
7498                                 (val2 & 0xFF000000) >> 24);
7499                 }
7500
7501                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7502                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7503                 if (val) {
7504                         rc = -EIO;
7505                         dev_err(hdev->dev,
7506                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7507                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7508                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7509                                 (val >> 4) & 0x1);
7510
7511                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7512                         dev_err(hdev->dev,
7513                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7514                                 device, ch * 2 + 1,
7515                                 RREG32(base + ch * 0x1000 + 0x074),
7516                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7517                                 (val2 & 0xFF0000) >> 16,
7518                                 (val2 & 0xFF000000) >> 24);
7519                 }
7520
7521                 /* Clear interrupts */
7522                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7523                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7524                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7525                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7526                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7527                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7528         }
7529
7530         val  = RREG32(base + 0x8F30);
7531         val2 = RREG32(base + 0x8F34);
7532         if (val | val2) {
7533                 rc = -EIO;
7534                 dev_err(hdev->dev,
7535                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7536                         device, val, val2);
7537         }
7538         val  = RREG32(base + 0x8F40);
7539         val2 = RREG32(base + 0x8F44);
7540         if (val | val2) {
7541                 rc = -EIO;
7542                 dev_err(hdev->dev,
7543                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7544                         device, val, val2);
7545         }
7546
7547         return rc;
7548 }
7549
7550 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7551 {
7552         switch (hbm_event_type) {
7553         case GAUDI_EVENT_HBM0_SPI_0:
7554         case GAUDI_EVENT_HBM0_SPI_1:
7555                 return 0;
7556         case GAUDI_EVENT_HBM1_SPI_0:
7557         case GAUDI_EVENT_HBM1_SPI_1:
7558                 return 1;
7559         case GAUDI_EVENT_HBM2_SPI_0:
7560         case GAUDI_EVENT_HBM2_SPI_1:
7561                 return 2;
7562         case GAUDI_EVENT_HBM3_SPI_0:
7563         case GAUDI_EVENT_HBM3_SPI_1:
7564                 return 3;
7565         default:
7566                 break;
7567         }
7568
7569         /* Should never happen */
7570         return 0;
7571 }
7572
7573 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7574                                         char *interrupt_name)
7575 {
7576         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7577         bool soft_reset_required = false;
7578
7579         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7580                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7581
7582         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7583                 if (tpc_interrupts_cause & BIT(i)) {
7584                         dev_err_ratelimited(hdev->dev,
7585                                         "TPC%d_%s interrupt cause: %s\n",
7586                                         tpc_id, interrupt_name,
7587                                         gaudi_tpc_interrupts_cause[i]);
7588                         /* If this is QM error, we need to soft-reset */
7589                         if (i == 15)
7590                                 soft_reset_required = true;
7591                 }
7592
7593         /* Clear interrupts */
7594         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7595
7596         return soft_reset_required;
7597 }
7598
7599 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7600 {
7601         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7602 }
7603
7604 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7605 {
7606         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7607 }
7608
7609 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
7610 {
7611         ktime_t zero_time = ktime_set(0, 0);
7612
7613         mutex_lock(&hdev->clk_throttling.lock);
7614
7615         switch (event_type) {
7616         case GAUDI_EVENT_FIX_POWER_ENV_S:
7617                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7618                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7619                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7620                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7621                 dev_info_ratelimited(hdev->dev,
7622                         "Clock throttling due to power consumption\n");
7623                 break;
7624
7625         case GAUDI_EVENT_FIX_POWER_ENV_E:
7626                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7627                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7628                 dev_info_ratelimited(hdev->dev,
7629                         "Power envelop is safe, back to optimal clock\n");
7630                 break;
7631
7632         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7633                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7634                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7635                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7636                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7637                 dev_info_ratelimited(hdev->dev,
7638                         "Clock throttling due to overheating\n");
7639                 break;
7640
7641         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7642                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7643                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7644                 dev_info_ratelimited(hdev->dev,
7645                         "Thermal envelop is safe, back to optimal clock\n");
7646                 break;
7647
7648         default:
7649                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7650                         event_type);
7651                 break;
7652         }
7653
7654         mutex_unlock(&hdev->clk_throttling.lock);
7655 }
7656
7657 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7658 {
7659         struct gaudi_device *gaudi = hdev->asic_specific;
7660         u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7661         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7662         u32 fw_fatal_err_flag = 0, flags = 0;
7663         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7664                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7665         bool reset_required, reset_direct = false;
7666         u8 cause;
7667         int rc;
7668
7669         if (event_type >= GAUDI_EVENT_SIZE) {
7670                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7671                                 event_type, GAUDI_EVENT_SIZE - 1);
7672                 return;
7673         }
7674
7675         gaudi->events_stat[event_type]++;
7676         gaudi->events_stat_aggregate[event_type]++;
7677
7678         switch (event_type) {
7679         case GAUDI_EVENT_PCIE_CORE_DERR:
7680         case GAUDI_EVENT_PCIE_IF_DERR:
7681         case GAUDI_EVENT_PCIE_PHY_DERR:
7682         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7683         case GAUDI_EVENT_MME0_ACC_DERR:
7684         case GAUDI_EVENT_MME0_SBAB_DERR:
7685         case GAUDI_EVENT_MME1_ACC_DERR:
7686         case GAUDI_EVENT_MME1_SBAB_DERR:
7687         case GAUDI_EVENT_MME2_ACC_DERR:
7688         case GAUDI_EVENT_MME2_SBAB_DERR:
7689         case GAUDI_EVENT_MME3_ACC_DERR:
7690         case GAUDI_EVENT_MME3_SBAB_DERR:
7691         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7692                 fallthrough;
7693         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7694         case GAUDI_EVENT_PSOC_MEM_DERR:
7695         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7696         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7697         case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7698         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7699         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7700         case GAUDI_EVENT_MMU_DERR:
7701         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7702                 gaudi_print_irq_info(hdev, event_type, true);
7703                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7704                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7705                 goto reset_device;
7706
7707         case GAUDI_EVENT_GIC500:
7708         case GAUDI_EVENT_AXI_ECC:
7709         case GAUDI_EVENT_L2_RAM_ECC:
7710         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7711                 gaudi_print_irq_info(hdev, event_type, false);
7712                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7713                 goto reset_device;
7714
7715         case GAUDI_EVENT_HBM0_SPI_0:
7716         case GAUDI_EVENT_HBM1_SPI_0:
7717         case GAUDI_EVENT_HBM2_SPI_0:
7718         case GAUDI_EVENT_HBM3_SPI_0:
7719                 gaudi_print_irq_info(hdev, event_type, false);
7720                 gaudi_hbm_read_interrupts(hdev,
7721                                 gaudi_hbm_event_to_dev(event_type),
7722                                 &eq_entry->hbm_ecc_data);
7723                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7724                 goto reset_device;
7725
7726         case GAUDI_EVENT_HBM0_SPI_1:
7727         case GAUDI_EVENT_HBM1_SPI_1:
7728         case GAUDI_EVENT_HBM2_SPI_1:
7729         case GAUDI_EVENT_HBM3_SPI_1:
7730                 gaudi_print_irq_info(hdev, event_type, false);
7731                 gaudi_hbm_read_interrupts(hdev,
7732                                 gaudi_hbm_event_to_dev(event_type),
7733                                 &eq_entry->hbm_ecc_data);
7734                 hl_fw_unmask_irq(hdev, event_type);
7735                 break;
7736
7737         case GAUDI_EVENT_TPC0_DEC:
7738         case GAUDI_EVENT_TPC1_DEC:
7739         case GAUDI_EVENT_TPC2_DEC:
7740         case GAUDI_EVENT_TPC3_DEC:
7741         case GAUDI_EVENT_TPC4_DEC:
7742         case GAUDI_EVENT_TPC5_DEC:
7743         case GAUDI_EVENT_TPC6_DEC:
7744         case GAUDI_EVENT_TPC7_DEC:
7745                 gaudi_print_irq_info(hdev, event_type, true);
7746                 reset_required = gaudi_tpc_read_interrupts(hdev,
7747                                         tpc_dec_event_to_tpc_id(event_type),
7748                                         "AXI_SLV_DEC_Error");
7749                 if (reset_required) {
7750                         dev_err(hdev->dev, "reset required due to %s\n",
7751                                 gaudi_irq_map_table[event_type].name);
7752
7753                         reset_direct = true;
7754                         goto reset_device;
7755                 } else {
7756                         hl_fw_unmask_irq(hdev, event_type);
7757                 }
7758                 break;
7759
7760         case GAUDI_EVENT_TPC0_KRN_ERR:
7761         case GAUDI_EVENT_TPC1_KRN_ERR:
7762         case GAUDI_EVENT_TPC2_KRN_ERR:
7763         case GAUDI_EVENT_TPC3_KRN_ERR:
7764         case GAUDI_EVENT_TPC4_KRN_ERR:
7765         case GAUDI_EVENT_TPC5_KRN_ERR:
7766         case GAUDI_EVENT_TPC6_KRN_ERR:
7767         case GAUDI_EVENT_TPC7_KRN_ERR:
7768                 gaudi_print_irq_info(hdev, event_type, true);
7769                 reset_required = gaudi_tpc_read_interrupts(hdev,
7770                                         tpc_krn_event_to_tpc_id(event_type),
7771                                         "KRN_ERR");
7772                 if (reset_required) {
7773                         dev_err(hdev->dev, "reset required due to %s\n",
7774                                 gaudi_irq_map_table[event_type].name);
7775
7776                         reset_direct = true;
7777                         goto reset_device;
7778                 } else {
7779                         hl_fw_unmask_irq(hdev, event_type);
7780                 }
7781                 break;
7782
7783         case GAUDI_EVENT_PCIE_CORE_SERR:
7784         case GAUDI_EVENT_PCIE_IF_SERR:
7785         case GAUDI_EVENT_PCIE_PHY_SERR:
7786         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7787         case GAUDI_EVENT_MME0_ACC_SERR:
7788         case GAUDI_EVENT_MME0_SBAB_SERR:
7789         case GAUDI_EVENT_MME1_ACC_SERR:
7790         case GAUDI_EVENT_MME1_SBAB_SERR:
7791         case GAUDI_EVENT_MME2_ACC_SERR:
7792         case GAUDI_EVENT_MME2_SBAB_SERR:
7793         case GAUDI_EVENT_MME3_ACC_SERR:
7794         case GAUDI_EVENT_MME3_SBAB_SERR:
7795         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7796         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7797         case GAUDI_EVENT_PSOC_MEM_SERR:
7798         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7799         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7800         case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7801         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7802         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7803                 fallthrough;
7804         case GAUDI_EVENT_MMU_SERR:
7805                 gaudi_print_irq_info(hdev, event_type, true);
7806                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7807                 hl_fw_unmask_irq(hdev, event_type);
7808                 break;
7809
7810         case GAUDI_EVENT_PCIE_DEC:
7811         case GAUDI_EVENT_MME0_WBC_RSP:
7812         case GAUDI_EVENT_MME0_SBAB0_RSP:
7813         case GAUDI_EVENT_MME1_WBC_RSP:
7814         case GAUDI_EVENT_MME1_SBAB0_RSP:
7815         case GAUDI_EVENT_MME2_WBC_RSP:
7816         case GAUDI_EVENT_MME2_SBAB0_RSP:
7817         case GAUDI_EVENT_MME3_WBC_RSP:
7818         case GAUDI_EVENT_MME3_SBAB0_RSP:
7819         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7820         case GAUDI_EVENT_PSOC_AXI_DEC:
7821         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7822         case GAUDI_EVENT_MMU_PAGE_FAULT:
7823         case GAUDI_EVENT_MMU_WR_PERM:
7824         case GAUDI_EVENT_RAZWI_OR_ADC:
7825         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7826         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7827                 fallthrough;
7828         case GAUDI_EVENT_NIC0_QM0:
7829         case GAUDI_EVENT_NIC0_QM1:
7830         case GAUDI_EVENT_NIC1_QM0:
7831         case GAUDI_EVENT_NIC1_QM1:
7832         case GAUDI_EVENT_NIC2_QM0:
7833         case GAUDI_EVENT_NIC2_QM1:
7834         case GAUDI_EVENT_NIC3_QM0:
7835         case GAUDI_EVENT_NIC3_QM1:
7836         case GAUDI_EVENT_NIC4_QM0:
7837         case GAUDI_EVENT_NIC4_QM1:
7838         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7839         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7840                 gaudi_print_irq_info(hdev, event_type, true);
7841                 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7842                 hl_fw_unmask_irq(hdev, event_type);
7843                 break;
7844
7845         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7846                 gaudi_print_irq_info(hdev, event_type, true);
7847                 goto reset_device;
7848
7849         case GAUDI_EVENT_TPC0_BMON_SPMU:
7850         case GAUDI_EVENT_TPC1_BMON_SPMU:
7851         case GAUDI_EVENT_TPC2_BMON_SPMU:
7852         case GAUDI_EVENT_TPC3_BMON_SPMU:
7853         case GAUDI_EVENT_TPC4_BMON_SPMU:
7854         case GAUDI_EVENT_TPC5_BMON_SPMU:
7855         case GAUDI_EVENT_TPC6_BMON_SPMU:
7856         case GAUDI_EVENT_TPC7_BMON_SPMU:
7857         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7858                 gaudi_print_irq_info(hdev, event_type, false);
7859                 hl_fw_unmask_irq(hdev, event_type);
7860                 break;
7861
7862         case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7863                 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7864                 hl_fw_unmask_irq(hdev, event_type);
7865                 break;
7866
7867         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7868                 gaudi_print_irq_info(hdev, event_type, false);
7869                 gaudi_print_sm_sei_info(hdev, event_type,
7870                                         &eq_entry->sm_sei_data);
7871                 rc = hl_state_dump(hdev);
7872                 if (rc)
7873                         dev_err(hdev->dev,
7874                                 "Error during system state dump %d\n", rc);
7875                 hl_fw_unmask_irq(hdev, event_type);
7876                 break;
7877
7878         case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7879                 break;
7880
7881         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7882                 gaudi_print_clk_change_info(hdev, event_type);
7883                 hl_fw_unmask_irq(hdev, event_type);
7884                 break;
7885
7886         case GAUDI_EVENT_PSOC_GPIO_U16_0:
7887                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7888                 dev_err(hdev->dev,
7889                         "Received high temp H/W interrupt %d (cause %d)\n",
7890                         event_type, cause);
7891                 break;
7892
7893         case GAUDI_EVENT_DEV_RESET_REQ:
7894                 gaudi_print_irq_info(hdev, event_type, false);
7895                 goto reset_device;
7896
7897         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7898                 gaudi_print_irq_info(hdev, event_type, false);
7899                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7900                 goto reset_device;
7901
7902         case GAUDI_EVENT_FW_ALIVE_S:
7903                 gaudi_print_irq_info(hdev, event_type, false);
7904                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7905                 goto reset_device;
7906
7907         default:
7908                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7909                                 event_type);
7910                 break;
7911         }
7912
7913         if (event_mask)
7914                 hl_notifier_event_send_all(hdev, event_mask);
7915
7916         return;
7917
7918 reset_device:
7919         reset_required = true;
7920
7921         if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7922                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7923
7924                 /* notify on device unavailable while the reset triggered by fw */
7925                 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7926                                         HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7927         } else if (hdev->hard_reset_on_fw_events) {
7928                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7929                 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7930         } else {
7931                 reset_required = false;
7932         }
7933
7934         /* despite reset doesn't execute. a notification on
7935          * occurred event needs to be sent here
7936          */
7937         hl_notifier_event_send_all(hdev, event_mask);
7938         if (reset_required)
7939                 hl_device_reset(hdev, flags);
7940         else
7941                 hl_fw_unmask_irq(hdev, event_type);
7942 }
7943
7944 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7945 {
7946         struct gaudi_device *gaudi = hdev->asic_specific;
7947
7948         if (aggregate) {
7949                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7950                 return gaudi->events_stat_aggregate;
7951         }
7952
7953         *size = (u32) sizeof(gaudi->events_stat);
7954         return gaudi->events_stat;
7955 }
7956
7957 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7958 {
7959         struct gaudi_device *gaudi = hdev->asic_specific;
7960         u32 status, timeout_usec;
7961         int rc;
7962
7963         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7964                 hdev->reset_info.hard_reset_pending)
7965                 return 0;
7966
7967         if (hdev->pldm)
7968                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7969         else
7970                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7971
7972         /* L0 & L1 invalidation */
7973         WREG32(mmSTLB_INV_PS, 3);
7974         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7975         WREG32(mmSTLB_INV_PS, 2);
7976
7977         rc = hl_poll_timeout(
7978                 hdev,
7979                 mmSTLB_INV_PS,
7980                 status,
7981                 !status,
7982                 1000,
7983                 timeout_usec);
7984
7985         WREG32(mmSTLB_INV_SET, 0);
7986
7987         return rc;
7988 }
7989
7990 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7991                                                 bool is_hard, u32 flags,
7992                                                 u32 asid, u64 va, u64 size)
7993 {
7994         /* Treat as invalidate all because there is no range invalidation
7995          * in Gaudi
7996          */
7997         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7998 }
7999
8000 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
8001 {
8002         u32 status, timeout_usec;
8003         int rc;
8004
8005         if (hdev->pldm)
8006                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8007         else
8008                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8009
8010         WREG32(MMU_ASID, asid);
8011         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8012         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8013         WREG32(MMU_BUSY, 0x80000000);
8014
8015         rc = hl_poll_timeout(
8016                 hdev,
8017                 MMU_BUSY,
8018                 status,
8019                 !(status & 0x80000000),
8020                 1000,
8021                 timeout_usec);
8022
8023         if (rc) {
8024                 dev_err(hdev->dev,
8025                         "Timeout during MMU hop0 config of asid %d\n", asid);
8026                 return rc;
8027         }
8028
8029         return 0;
8030 }
8031
8032 static int gaudi_send_heartbeat(struct hl_device *hdev)
8033 {
8034         struct gaudi_device *gaudi = hdev->asic_specific;
8035
8036         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8037                 return 0;
8038
8039         return hl_fw_send_heartbeat(hdev);
8040 }
8041
8042 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8043 {
8044         struct gaudi_device *gaudi = hdev->asic_specific;
8045         struct asic_fixed_properties *prop = &hdev->asic_prop;
8046         int rc;
8047
8048         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8049                 return 0;
8050
8051         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8052                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8053                                         mmCPU_BOOT_ERR1);
8054         if (rc)
8055                 return rc;
8056
8057         if (!strlen(prop->cpucp_info.card_name))
8058                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8059                                 CARD_NAME_MAX_LEN);
8060
8061         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8062
8063         set_default_power_values(hdev);
8064
8065         return 0;
8066 }
8067
8068 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8069                 struct engines_data *e)
8070 {
8071         struct gaudi_device *gaudi = hdev->asic_specific;
8072         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8073         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8074         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8075         unsigned long *mask = (unsigned long *)mask_arr;
8076         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8077         bool is_idle = true, is_eng_idle, is_slave;
8078         u64 offset;
8079         int i, dma_id, port;
8080
8081         if (e)
8082                 hl_engine_data_sprintf(e,
8083                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8084                         "---  -------  ------------  ----------  -------------\n");
8085
8086         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8087                 dma_id = gaudi_dma_assignment[i];
8088                 offset = dma_id * DMA_QMAN_OFFSET;
8089
8090                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8091                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8092                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8093                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8094                                 IS_DMA_IDLE(dma_core_sts0);
8095                 is_idle &= is_eng_idle;
8096
8097                 if (mask && !is_eng_idle)
8098                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8099                 if (e)
8100                         hl_engine_data_sprintf(e, fmt, dma_id,
8101                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8102                                 qm_cgm_sts, dma_core_sts0);
8103         }
8104
8105         if (e)
8106                 hl_engine_data_sprintf(e,
8107                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8108                         "---  -------  ------------  ----------  ----------\n");
8109
8110         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8111                 offset = i * TPC_QMAN_OFFSET;
8112                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8113                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8114                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8115                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8116                                 IS_TPC_IDLE(tpc_cfg_sts);
8117                 is_idle &= is_eng_idle;
8118
8119                 if (mask && !is_eng_idle)
8120                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8121                 if (e)
8122                         hl_engine_data_sprintf(e, fmt, i,
8123                                 is_eng_idle ? "Y" : "N",
8124                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8125         }
8126
8127         if (e)
8128                 hl_engine_data_sprintf(e,
8129                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8130                         "---  -------  ------------  ----------  -----------\n");
8131
8132         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8133                 offset = i * MME_QMAN_OFFSET;
8134                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8135                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8136
8137                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8138                 is_slave = i % 2;
8139                 if (!is_slave) {
8140                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8141                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8142                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8143                 }
8144
8145                 is_idle &= is_eng_idle;
8146
8147                 if (mask && !is_eng_idle)
8148                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8149                 if (e) {
8150                         if (!is_slave)
8151                                 hl_engine_data_sprintf(e, fmt, i,
8152                                         is_eng_idle ? "Y" : "N",
8153                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8154                         else
8155                                 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8156                                         is_eng_idle ? "Y" : "N", "-",
8157                                         "-", mme_arch_sts);
8158                 }
8159         }
8160
8161         if (e)
8162                 hl_engine_data_sprintf(e,
8163                                 "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8164                                 "---  -------  ------------  ----------\n");
8165
8166         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8167                 offset = i * NIC_MACRO_QMAN_OFFSET;
8168                 port = 2 * i;
8169                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8170                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8171                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8172                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8173                         is_idle &= is_eng_idle;
8174
8175                         if (mask && !is_eng_idle)
8176                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8177                         if (e)
8178                                 hl_engine_data_sprintf(e, nic_fmt, port,
8179                                                 is_eng_idle ? "Y" : "N",
8180                                                 qm_glbl_sts0, qm_cgm_sts);
8181                 }
8182
8183                 port = 2 * i + 1;
8184                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8185                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8186                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8187                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8188                         is_idle &= is_eng_idle;
8189
8190                         if (mask && !is_eng_idle)
8191                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8192                         if (e)
8193                                 hl_engine_data_sprintf(e, nic_fmt, port,
8194                                                 is_eng_idle ? "Y" : "N",
8195                                                 qm_glbl_sts0, qm_cgm_sts);
8196                 }
8197         }
8198
8199         if (e)
8200                 hl_engine_data_sprintf(e, "\n");
8201
8202         return is_idle;
8203 }
8204
8205 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8206         __acquires(&gaudi->hw_queues_lock)
8207 {
8208         struct gaudi_device *gaudi = hdev->asic_specific;
8209
8210         spin_lock(&gaudi->hw_queues_lock);
8211 }
8212
8213 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8214         __releases(&gaudi->hw_queues_lock)
8215 {
8216         struct gaudi_device *gaudi = hdev->asic_specific;
8217
8218         spin_unlock(&gaudi->hw_queues_lock);
8219 }
8220
8221 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8222 {
8223         return hdev->pdev->device;
8224 }
8225
8226 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8227                                 size_t max_size)
8228 {
8229         struct gaudi_device *gaudi = hdev->asic_specific;
8230
8231         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8232                 return 0;
8233
8234         return hl_fw_get_eeprom_data(hdev, data, max_size);
8235 }
8236
8237 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8238 {
8239         struct gaudi_device *gaudi = hdev->asic_specific;
8240
8241         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8242                 return 0;
8243
8244         return hl_fw_get_monitor_dump(hdev, data);
8245 }
8246
8247 /*
8248  * this function should be used only during initialization and/or after reset,
8249  * when there are no active users.
8250  */
8251 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8252 {
8253         u64 kernel_timeout;
8254         u32 status, offset;
8255         int rc;
8256
8257         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8258
8259         if (hdev->pldm)
8260                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8261         else
8262                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8263
8264         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8265                         lower_32_bits(tpc_kernel));
8266         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8267                         upper_32_bits(tpc_kernel));
8268
8269         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8270                         lower_32_bits(tpc_kernel));
8271         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8272                         upper_32_bits(tpc_kernel));
8273         /* set a valid LUT pointer, content is of no significance */
8274         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8275                         lower_32_bits(tpc_kernel));
8276         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8277                         upper_32_bits(tpc_kernel));
8278
8279         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8280                         lower_32_bits(CFG_BASE +
8281                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8282
8283         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8284                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8285                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8286         /* wait a bit for the engine to start executing */
8287         usleep_range(1000, 1500);
8288
8289         /* wait until engine has finished executing */
8290         rc = hl_poll_timeout(
8291                 hdev,
8292                 mmTPC0_CFG_STATUS + offset,
8293                 status,
8294                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8295                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8296                 1000,
8297                 kernel_timeout);
8298
8299         if (rc) {
8300                 dev_err(hdev->dev,
8301                         "Timeout while waiting for TPC%d icache prefetch\n",
8302                         tpc_id);
8303                 return -EIO;
8304         }
8305
8306         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8307                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8308
8309         /* wait a bit for the engine to start executing */
8310         usleep_range(1000, 1500);
8311
8312         /* wait until engine has finished executing */
8313         rc = hl_poll_timeout(
8314                 hdev,
8315                 mmTPC0_CFG_STATUS + offset,
8316                 status,
8317                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8318                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8319                 1000,
8320                 kernel_timeout);
8321
8322         if (rc) {
8323                 dev_err(hdev->dev,
8324                         "Timeout while waiting for TPC%d vector pipe\n",
8325                         tpc_id);
8326                 return -EIO;
8327         }
8328
8329         rc = hl_poll_timeout(
8330                 hdev,
8331                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8332                 status,
8333                 (status == 0),
8334                 1000,
8335                 kernel_timeout);
8336
8337         if (rc) {
8338                 dev_err(hdev->dev,
8339                         "Timeout while waiting for TPC%d kernel to execute\n",
8340                         tpc_id);
8341                 return -EIO;
8342         }
8343
8344         return 0;
8345 }
8346
8347 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8348                 struct hl_ctx *ctx)
8349 {
8350         struct gaudi_device *gaudi = hdev->asic_specific;
8351         int min_alloc_order, rc, collective_cb_size;
8352
8353         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8354                 return 0;
8355
8356         hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8357                                                         HOST_SPACE_INTERNAL_CB_SZ,
8358                                                         &hdev->internal_cb_pool_dma_addr,
8359                                                         GFP_KERNEL | __GFP_ZERO);
8360
8361         if (!hdev->internal_cb_pool_virt_addr)
8362                 return -ENOMEM;
8363
8364         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8365                         sizeof(struct packet_fence);
8366         min_alloc_order = ilog2(collective_cb_size);
8367
8368         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8369         if (!hdev->internal_cb_pool) {
8370                 dev_err(hdev->dev,
8371                         "Failed to create internal CB pool\n");
8372                 rc = -ENOMEM;
8373                 goto free_internal_cb_pool;
8374         }
8375
8376         rc = gen_pool_add(hdev->internal_cb_pool,
8377                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8378                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8379         if (rc) {
8380                 dev_err(hdev->dev,
8381                         "Failed to add memory to internal CB pool\n");
8382                 rc = -EFAULT;
8383                 goto destroy_internal_cb_pool;
8384         }
8385
8386         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8387                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8388                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8389
8390         if (!hdev->internal_cb_va_base) {
8391                 rc = -ENOMEM;
8392                 goto destroy_internal_cb_pool;
8393         }
8394
8395         mutex_lock(&ctx->mmu_lock);
8396         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8397                         hdev->internal_cb_pool_dma_addr,
8398                         HOST_SPACE_INTERNAL_CB_SZ);
8399
8400         hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8401         mutex_unlock(&ctx->mmu_lock);
8402
8403         if (rc)
8404                 goto unreserve_internal_cb_pool;
8405
8406         return 0;
8407
8408 unreserve_internal_cb_pool:
8409         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8410                         HOST_SPACE_INTERNAL_CB_SZ);
8411 destroy_internal_cb_pool:
8412         gen_pool_destroy(hdev->internal_cb_pool);
8413 free_internal_cb_pool:
8414         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8415                                         hdev->internal_cb_pool_dma_addr);
8416
8417         return rc;
8418 }
8419
8420 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8421                 struct hl_ctx *ctx)
8422 {
8423         struct gaudi_device *gaudi = hdev->asic_specific;
8424
8425         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8426                 return;
8427
8428         mutex_lock(&ctx->mmu_lock);
8429         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8430                         HOST_SPACE_INTERNAL_CB_SZ);
8431         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8432                         HOST_SPACE_INTERNAL_CB_SZ);
8433         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8434         mutex_unlock(&ctx->mmu_lock);
8435
8436         gen_pool_destroy(hdev->internal_cb_pool);
8437
8438         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8439                                         hdev->internal_cb_pool_dma_addr);
8440 }
8441
8442 static int gaudi_ctx_init(struct hl_ctx *ctx)
8443 {
8444         int rc;
8445
8446         if (ctx->asid == HL_KERNEL_ASID_ID)
8447                 return 0;
8448
8449         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8450         if (rc)
8451                 return rc;
8452
8453         rc = gaudi_restore_user_registers(ctx->hdev);
8454         if (rc)
8455                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8456
8457         return rc;
8458 }
8459
8460 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8461 {
8462         if (ctx->asid == HL_KERNEL_ASID_ID)
8463                 return;
8464
8465         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8466 }
8467
8468 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8469 {
8470         return 0;
8471 }
8472
8473 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8474 {
8475         return gaudi_cq_assignment[cq_idx];
8476 }
8477
8478 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8479 {
8480         return sizeof(struct packet_msg_short) +
8481                         sizeof(struct packet_msg_prot) * 2;
8482 }
8483
8484 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8485 {
8486         return sizeof(struct packet_msg_short) * 4 +
8487                         sizeof(struct packet_fence) +
8488                         sizeof(struct packet_msg_prot) * 2;
8489 }
8490
8491 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8492 {
8493         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8494 }
8495
8496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8497                                 u32 size, bool eb)
8498 {
8499         struct hl_cb *cb = (struct hl_cb *) data;
8500         struct packet_msg_short *pkt;
8501         u32 value, ctl, pkt_size = sizeof(*pkt);
8502
8503         pkt = cb->kernel_address + size;
8504         memset(pkt, 0, pkt_size);
8505
8506         /* Inc by 1, Mode ADD */
8507         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8508         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8509
8510         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8511         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8512         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8513         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8514         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8515         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8516         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8517
8518         pkt->value = cpu_to_le32(value);
8519         pkt->ctl = cpu_to_le32(ctl);
8520
8521         return size + pkt_size;
8522 }
8523
8524 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8525                                         u16 addr)
8526 {
8527         u32 ctl, pkt_size = sizeof(*pkt);
8528
8529         memset(pkt, 0, pkt_size);
8530
8531         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8532         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8533         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8534         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8535         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8536         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8537
8538         pkt->value = cpu_to_le32(value);
8539         pkt->ctl = cpu_to_le32(ctl);
8540
8541         return pkt_size;
8542 }
8543
8544 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8545                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8546                 u16 sob_val, u16 mon_id)
8547 {
8548         u64 monitor_base;
8549         u32 ctl, value, pkt_size = sizeof(*pkt);
8550         u16 msg_addr_offset;
8551         u8 mask;
8552
8553         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8554                 dev_err(hdev->dev,
8555                         "sob_base %u (mask %#x) is not valid\n",
8556                         sob_base, sob_mask);
8557                 return 0;
8558         }
8559
8560         /*
8561          * monitor_base should be the content of the base0 address registers,
8562          * so it will be added to the msg short offsets
8563          */
8564         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8565
8566         msg_addr_offset =
8567                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8568                                 monitor_base;
8569
8570         memset(pkt, 0, pkt_size);
8571
8572         /* Monitor config packet: bind the monitor to a sync object */
8573         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8574         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8575         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8576                         0); /* GREATER OR EQUAL*/
8577         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8578
8579         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8580         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8581         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8582         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8583         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8584         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8585         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8586
8587         pkt->value = cpu_to_le32(value);
8588         pkt->ctl = cpu_to_le32(ctl);
8589
8590         return pkt_size;
8591 }
8592
8593 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8594 {
8595         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8596
8597         memset(pkt, 0, pkt_size);
8598
8599         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8600         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8601         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8602
8603         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8604         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8605         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8606         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8607
8608         pkt->cfg = cpu_to_le32(cfg);
8609         pkt->ctl = cpu_to_le32(ctl);
8610
8611         return pkt_size;
8612 }
8613
8614 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8615 {
8616         u32 offset, nic_index;
8617
8618         switch (queue_id) {
8619         case GAUDI_QUEUE_ID_DMA_0_0:
8620                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8621                 break;
8622         case GAUDI_QUEUE_ID_DMA_0_1:
8623                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8624                 break;
8625         case GAUDI_QUEUE_ID_DMA_0_2:
8626                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8627                 break;
8628         case GAUDI_QUEUE_ID_DMA_0_3:
8629                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8630                 break;
8631         case GAUDI_QUEUE_ID_DMA_1_0:
8632                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8633                 break;
8634         case GAUDI_QUEUE_ID_DMA_1_1:
8635                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8636                 break;
8637         case GAUDI_QUEUE_ID_DMA_1_2:
8638                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8639                 break;
8640         case GAUDI_QUEUE_ID_DMA_1_3:
8641                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8642                 break;
8643         case GAUDI_QUEUE_ID_DMA_5_0:
8644                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8645                 break;
8646         case GAUDI_QUEUE_ID_DMA_5_1:
8647                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8648                 break;
8649         case GAUDI_QUEUE_ID_DMA_5_2:
8650                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8651                 break;
8652         case GAUDI_QUEUE_ID_DMA_5_3:
8653                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8654                 break;
8655         case GAUDI_QUEUE_ID_TPC_7_0:
8656                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8657                 break;
8658         case GAUDI_QUEUE_ID_TPC_7_1:
8659                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8660                 break;
8661         case GAUDI_QUEUE_ID_TPC_7_2:
8662                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8663                 break;
8664         case GAUDI_QUEUE_ID_TPC_7_3:
8665                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8666                 break;
8667         case GAUDI_QUEUE_ID_NIC_0_0:
8668         case GAUDI_QUEUE_ID_NIC_1_0:
8669         case GAUDI_QUEUE_ID_NIC_2_0:
8670         case GAUDI_QUEUE_ID_NIC_3_0:
8671         case GAUDI_QUEUE_ID_NIC_4_0:
8672         case GAUDI_QUEUE_ID_NIC_5_0:
8673         case GAUDI_QUEUE_ID_NIC_6_0:
8674         case GAUDI_QUEUE_ID_NIC_7_0:
8675         case GAUDI_QUEUE_ID_NIC_8_0:
8676         case GAUDI_QUEUE_ID_NIC_9_0:
8677                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8678                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8679                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8680                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8681                 break;
8682         case GAUDI_QUEUE_ID_NIC_0_1:
8683         case GAUDI_QUEUE_ID_NIC_1_1:
8684         case GAUDI_QUEUE_ID_NIC_2_1:
8685         case GAUDI_QUEUE_ID_NIC_3_1:
8686         case GAUDI_QUEUE_ID_NIC_4_1:
8687         case GAUDI_QUEUE_ID_NIC_5_1:
8688         case GAUDI_QUEUE_ID_NIC_6_1:
8689         case GAUDI_QUEUE_ID_NIC_7_1:
8690         case GAUDI_QUEUE_ID_NIC_8_1:
8691         case GAUDI_QUEUE_ID_NIC_9_1:
8692                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8693                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8694                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8695                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8696                 break;
8697         case GAUDI_QUEUE_ID_NIC_0_2:
8698         case GAUDI_QUEUE_ID_NIC_1_2:
8699         case GAUDI_QUEUE_ID_NIC_2_2:
8700         case GAUDI_QUEUE_ID_NIC_3_2:
8701         case GAUDI_QUEUE_ID_NIC_4_2:
8702         case GAUDI_QUEUE_ID_NIC_5_2:
8703         case GAUDI_QUEUE_ID_NIC_6_2:
8704         case GAUDI_QUEUE_ID_NIC_7_2:
8705         case GAUDI_QUEUE_ID_NIC_8_2:
8706         case GAUDI_QUEUE_ID_NIC_9_2:
8707                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8708                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8709                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8710                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8711                 break;
8712         case GAUDI_QUEUE_ID_NIC_0_3:
8713         case GAUDI_QUEUE_ID_NIC_1_3:
8714         case GAUDI_QUEUE_ID_NIC_2_3:
8715         case GAUDI_QUEUE_ID_NIC_3_3:
8716         case GAUDI_QUEUE_ID_NIC_4_3:
8717         case GAUDI_QUEUE_ID_NIC_5_3:
8718         case GAUDI_QUEUE_ID_NIC_6_3:
8719         case GAUDI_QUEUE_ID_NIC_7_3:
8720         case GAUDI_QUEUE_ID_NIC_8_3:
8721         case GAUDI_QUEUE_ID_NIC_9_3:
8722                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8723                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8724                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8725                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8726                 break;
8727         default:
8728                 return -EINVAL;
8729         }
8730
8731         *addr = CFG_BASE + offset;
8732
8733         return 0;
8734 }
8735
8736 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8737 {
8738         u64 monitor_base;
8739         u32 size = 0;
8740         u16 msg_addr_offset;
8741
8742         /*
8743          * monitor_base should be the content of the base0 address registers,
8744          * so it will be added to the msg short offsets
8745          */
8746         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8747
8748         /* First monitor config packet: low address of the sync */
8749         msg_addr_offset =
8750                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8751                                 monitor_base;
8752
8753         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8754                                         msg_addr_offset);
8755
8756         /* Second monitor config packet: high address of the sync */
8757         msg_addr_offset =
8758                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8759                                 monitor_base;
8760
8761         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8762                                         msg_addr_offset);
8763
8764         /*
8765          * Third monitor config packet: the payload, i.e. what to write when the
8766          * sync triggers
8767          */
8768         msg_addr_offset =
8769                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8770                                 monitor_base;
8771
8772         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8773
8774         return size;
8775 }
8776
8777 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8778                                 struct hl_gen_wait_properties *prop)
8779 {
8780         struct hl_cb *cb = (struct hl_cb *) prop->data;
8781         void *buf = cb->kernel_address;
8782         u64 fence_addr = 0;
8783         u32 size = prop->size;
8784
8785         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8786                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8787                                 prop->q_idx);
8788                 return 0;
8789         }
8790
8791         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8792         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8793                         prop->sob_mask, prop->sob_val, prop->mon_id);
8794         size += gaudi_add_fence_pkt(buf + size);
8795
8796         return size;
8797 }
8798
8799 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8800 {
8801         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8802
8803         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8804                 hw_sob->sob_id);
8805
8806         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8807                         hw_sob->sob_id * 4, 0);
8808
8809         kref_init(&hw_sob->kref);
8810 }
8811
8812 static u64 gaudi_get_device_time(struct hl_device *hdev)
8813 {
8814         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8815
8816         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8817 }
8818
8819 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8820                                 u32 *block_size, u32 *block_id)
8821 {
8822         return -EPERM;
8823 }
8824
8825 static int gaudi_block_mmap(struct hl_device *hdev,
8826                                 struct vm_area_struct *vma,
8827                                 u32 block_id, u32 block_size)
8828 {
8829         return -EPERM;
8830 }
8831
8832 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8833 {
8834         struct cpu_dyn_regs *dyn_regs =
8835                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8836         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8837                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8838                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
8839
8840         WREG32(irq_handler_offset,
8841                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8842 }
8843
8844 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8845 {
8846         return -EINVAL;
8847 }
8848
8849 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8850 {
8851         switch (pll_idx) {
8852         case HL_GAUDI_CPU_PLL: return CPU_PLL;
8853         case HL_GAUDI_PCI_PLL: return PCI_PLL;
8854         case HL_GAUDI_NIC_PLL: return NIC_PLL;
8855         case HL_GAUDI_DMA_PLL: return DMA_PLL;
8856         case HL_GAUDI_MESH_PLL: return MESH_PLL;
8857         case HL_GAUDI_MME_PLL: return MME_PLL;
8858         case HL_GAUDI_TPC_PLL: return TPC_PLL;
8859         case HL_GAUDI_IF_PLL: return IF_PLL;
8860         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8861         case HL_GAUDI_HBM_PLL: return HBM_PLL;
8862         default: return -EINVAL;
8863         }
8864 }
8865
8866 static int gaudi_add_sync_to_engine_map_entry(
8867         struct hl_sync_to_engine_map *map, u32 reg_value,
8868         enum hl_sync_engine_type engine_type, u32 engine_id)
8869 {
8870         struct hl_sync_to_engine_map_entry *entry;
8871
8872         /* Reg value represents a partial address of sync object,
8873          * it is used as unique identifier. For this we need to
8874          * clear the cutoff cfg base bits from the value.
8875          */
8876         if (reg_value == 0 || reg_value == 0xffffffff)
8877                 return 0;
8878         reg_value -= lower_32_bits(CFG_BASE);
8879
8880         /* create a new hash entry */
8881         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8882         if (!entry)
8883                 return -ENOMEM;
8884         entry->engine_type = engine_type;
8885         entry->engine_id = engine_id;
8886         entry->sync_id = reg_value;
8887         hash_add(map->tb, &entry->node, reg_value);
8888
8889         return 0;
8890 }
8891
8892 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8893                                 struct hl_sync_to_engine_map *map)
8894 {
8895         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8896         int i, j, rc;
8897         u32 reg_value;
8898
8899         /* Iterate over TPC engines */
8900         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8901
8902                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8903                                         sds->props[SP_NEXT_TPC] * i);
8904
8905                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8906                                                         ENGINE_TPC, i);
8907                 if (rc)
8908                         goto free_sync_to_engine_map;
8909         }
8910
8911         /* Iterate over MME engines */
8912         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8913                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8914
8915                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8916                                                 sds->props[SP_NEXT_MME] * i +
8917                                                 j * sizeof(u32));
8918
8919                         rc = gaudi_add_sync_to_engine_map_entry(
8920                                 map, reg_value, ENGINE_MME,
8921                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8922                         if (rc)
8923                                 goto free_sync_to_engine_map;
8924                 }
8925         }
8926
8927         /* Iterate over DMA engines */
8928         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8929                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8930                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
8931                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8932                                                         ENGINE_DMA, i);
8933                 if (rc)
8934                         goto free_sync_to_engine_map;
8935         }
8936
8937         return 0;
8938
8939 free_sync_to_engine_map:
8940         hl_state_dump_free_sync_to_engine_map(map);
8941
8942         return rc;
8943 }
8944
8945 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8946 {
8947         return FIELD_GET(
8948                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8949                 mon->status);
8950 }
8951
8952 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8953 {
8954         const size_t max_write = 10;
8955         u32 gid, mask, sob;
8956         int i, offset;
8957
8958         /* Sync object ID is calculated as follows:
8959          * (8 * group_id + cleared bits in mask)
8960          */
8961         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8962                         mon->arm_data);
8963         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8964                         mon->arm_data);
8965
8966         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8967                 max_write; mask >>= 1, i++) {
8968                 if (!(mask & 1)) {
8969                         sob = gid * MONITOR_MAX_SOBS + i;
8970
8971                         if (offset > 0)
8972                                 offset += snprintf(sobs + offset, max_write,
8973                                                         ", ");
8974
8975                         offset += snprintf(sobs + offset, max_write, "%u", sob);
8976                 }
8977         }
8978 }
8979
8980 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8981                                 struct hl_device *hdev,
8982                                 struct hl_mon_state_dump *mon)
8983 {
8984         const char *name;
8985         char scratch_buf1[BIN_REG_STRING_SIZE],
8986                 scratch_buf2[BIN_REG_STRING_SIZE];
8987         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8988
8989         name = hl_state_dump_get_monitor_name(hdev, mon);
8990         if (!name)
8991                 name = "";
8992
8993         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8994
8995         return hl_snprintf_resize(
8996                 buf, size, offset,
8997                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8998                 mon->id, name,
8999                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9000                                 mon->arm_data),
9001                 hl_format_as_binary(
9002                         scratch_buf1, sizeof(scratch_buf1),
9003                         FIELD_GET(
9004                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9005                                 mon->arm_data)),
9006                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9007                                 mon->arm_data),
9008                 mon->wr_data,
9009                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9010                 hl_format_as_binary(
9011                         scratch_buf2, sizeof(scratch_buf2),
9012                         FIELD_GET(
9013                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9014                                 mon->status)),
9015                 monitored_sobs);
9016 }
9017
9018
9019 static int gaudi_print_fences_single_engine(
9020         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9021         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9022         size_t *size, size_t *offset)
9023 {
9024         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9025         int rc = -ENOMEM, i;
9026         u32 *statuses, *fences;
9027
9028         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9029                         sizeof(*statuses), GFP_KERNEL);
9030         if (!statuses)
9031                 goto out;
9032
9033         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9034                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9035                          sizeof(*fences), GFP_KERNEL);
9036         if (!fences)
9037                 goto free_status;
9038
9039         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9040                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9041
9042         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9043                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9044                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9045
9046         /* The actual print */
9047         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9048                 u32 fence_id;
9049                 u64 fence_cnt, fence_rdata;
9050                 const char *engine_name;
9051
9052                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9053                         statuses[i]))
9054                         continue;
9055
9056                 fence_id =
9057                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9058                 fence_cnt = base_offset + CFG_BASE +
9059                         sizeof(u32) *
9060                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9061                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9062                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9063                 engine_name = hl_sync_engine_to_string(engine_type);
9064
9065                 rc = hl_snprintf_resize(
9066                         buf, size, offset,
9067                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9068                         engine_name, engine_id,
9069                         i, fence_id,
9070                         fence_cnt, engine_name, engine_id, fence_id, i,
9071                         fence_rdata, engine_name, engine_id, fence_id, i,
9072                         fences[fence_id],
9073                         statuses[i]);
9074                 if (rc)
9075                         goto free_fences;
9076         }
9077
9078         rc = 0;
9079
9080 free_fences:
9081         kfree(fences);
9082 free_status:
9083         kfree(statuses);
9084 out:
9085         return rc;
9086 }
9087
9088
9089 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9090         .monitor_valid = gaudi_monitor_valid,
9091         .print_single_monitor = gaudi_print_single_monitor,
9092         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9093         .print_fences_single_engine = gaudi_print_fences_single_engine,
9094 };
9095
9096 static void gaudi_state_dump_init(struct hl_device *hdev)
9097 {
9098         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9099         int i;
9100
9101         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9102                 hash_add(sds->so_id_to_str_tb,
9103                         &gaudi_so_id_to_str[i].node,
9104                         gaudi_so_id_to_str[i].id);
9105
9106         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9107                 hash_add(sds->monitor_id_to_str_tb,
9108                         &gaudi_monitor_id_to_str[i].node,
9109                         gaudi_monitor_id_to_str[i].id);
9110
9111         sds->props = gaudi_state_dump_specs_props;
9112
9113         sds->sync_namager_names = gaudi_sync_manager_names;
9114
9115         sds->funcs = gaudi_state_dump_funcs;
9116 }
9117
9118 static u32 *gaudi_get_stream_master_qid_arr(void)
9119 {
9120         return gaudi_stream_master;
9121 }
9122
9123 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9124 {
9125 }
9126
9127 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9128 {
9129         struct hl_device *hdev = dev_get_drvdata(dev);
9130         struct cpucp_info *cpucp_info;
9131
9132         cpucp_info = &hdev->asic_prop.cpucp_info;
9133
9134         return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9135 }
9136
9137 static DEVICE_ATTR_RO(infineon_ver);
9138
9139 static struct attribute *gaudi_vrm_dev_attrs[] = {
9140         &dev_attr_infineon_ver.attr,
9141         NULL,
9142 };
9143
9144 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9145                                         struct attribute_group *dev_vrm_attr_grp)
9146 {
9147         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9148         dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9149 }
9150
9151 static const struct hl_asic_funcs gaudi_funcs = {
9152         .early_init = gaudi_early_init,
9153         .early_fini = gaudi_early_fini,
9154         .late_init = gaudi_late_init,
9155         .late_fini = gaudi_late_fini,
9156         .sw_init = gaudi_sw_init,
9157         .sw_fini = gaudi_sw_fini,
9158         .hw_init = gaudi_hw_init,
9159         .hw_fini = gaudi_hw_fini,
9160         .halt_engines = gaudi_halt_engines,
9161         .suspend = gaudi_suspend,
9162         .resume = gaudi_resume,
9163         .mmap = gaudi_mmap,
9164         .ring_doorbell = gaudi_ring_doorbell,
9165         .pqe_write = gaudi_pqe_write,
9166         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9167         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9168         .scrub_device_mem = gaudi_scrub_device_mem,
9169         .scrub_device_dram = gaudi_scrub_device_dram,
9170         .get_int_queue_base = gaudi_get_int_queue_base,
9171         .test_queues = gaudi_test_queues,
9172         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9173         .asic_dma_pool_free = gaudi_dma_pool_free,
9174         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9175         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9176         .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9177         .cs_parser = gaudi_cs_parser,
9178         .asic_dma_map_sgtable = hl_dma_map_sgtable,
9179         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9180         .update_eq_ci = gaudi_update_eq_ci,
9181         .context_switch = gaudi_context_switch,
9182         .restore_phase_topology = gaudi_restore_phase_topology,
9183         .debugfs_read_dma = gaudi_debugfs_read_dma,
9184         .add_device_attr = gaudi_add_device_attr,
9185         .handle_eqe = gaudi_handle_eqe,
9186         .get_events_stat = gaudi_get_events_stat,
9187         .read_pte = gaudi_read_pte,
9188         .write_pte = gaudi_write_pte,
9189         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9190         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9191         .mmu_prefetch_cache_range = NULL,
9192         .send_heartbeat = gaudi_send_heartbeat,
9193         .debug_coresight = gaudi_debug_coresight,
9194         .is_device_idle = gaudi_is_device_idle,
9195         .compute_reset_late_init = gaudi_compute_reset_late_init,
9196         .hw_queues_lock = gaudi_hw_queues_lock,
9197         .hw_queues_unlock = gaudi_hw_queues_unlock,
9198         .get_pci_id = gaudi_get_pci_id,
9199         .get_eeprom_data = gaudi_get_eeprom_data,
9200         .get_monitor_dump = gaudi_get_monitor_dump,
9201         .send_cpu_message = gaudi_send_cpu_message,
9202         .pci_bars_map = gaudi_pci_bars_map,
9203         .init_iatu = gaudi_init_iatu,
9204         .rreg = hl_rreg,
9205         .wreg = hl_wreg,
9206         .halt_coresight = gaudi_halt_coresight,
9207         .ctx_init = gaudi_ctx_init,
9208         .ctx_fini = gaudi_ctx_fini,
9209         .pre_schedule_cs = gaudi_pre_schedule_cs,
9210         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9211         .load_firmware_to_device = gaudi_load_firmware_to_device,
9212         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9213         .get_signal_cb_size = gaudi_get_signal_cb_size,
9214         .get_wait_cb_size = gaudi_get_wait_cb_size,
9215         .gen_signal_cb = gaudi_gen_signal_cb,
9216         .gen_wait_cb = gaudi_gen_wait_cb,
9217         .reset_sob = gaudi_reset_sob,
9218         .reset_sob_group = gaudi_reset_sob_group,
9219         .get_device_time = gaudi_get_device_time,
9220         .pb_print_security_errors = NULL,
9221         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9222         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9223         .get_dec_base_addr = NULL,
9224         .scramble_addr = hl_mmu_scramble_addr,
9225         .descramble_addr = hl_mmu_descramble_addr,
9226         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9227         .get_hw_block_id = gaudi_get_hw_block_id,
9228         .hw_block_mmap = gaudi_block_mmap,
9229         .enable_events_from_fw = gaudi_enable_events_from_fw,
9230         .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9231         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9232         .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9233         .init_firmware_loader = gaudi_init_firmware_loader,
9234         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9235         .state_dump_init = gaudi_state_dump_init,
9236         .get_sob_addr = gaudi_get_sob_addr,
9237         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9238         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9239         .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9240         .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9241         .access_dev_mem = hl_access_dev_mem,
9242         .set_dram_bar_base = gaudi_set_hbm_bar_base,
9243 };
9244
9245 /**
9246  * gaudi_set_asic_funcs - set GAUDI function pointers
9247  *
9248  * @hdev: pointer to hl_device structure
9249  *
9250  */
9251 void gaudi_set_asic_funcs(struct hl_device *hdev)
9252 {
9253         hdev->asic_funcs = &gaudi_funcs;
9254 }